1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.8 1995/03/10 04:44:54 bsmith Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 #define CHUNCKSIZE 100 10 /* 11 This is a simple minded stash. Do a linear search to determine if 12 in stash, if not add to end. 13 */ 14 static int StashValues(Stash *stash,int row,int n, int *idxn, 15 Scalar *values,InsertMode addv) 16 { 17 int i,j,N = stash->n,found,*n_idx, *n_idy; 18 Scalar val,*n_array; 19 20 for ( i=0; i<n; i++ ) { 21 found = 0; 22 val = *values++; 23 for ( j=0; j<N; j++ ) { 24 if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) { 25 /* found a match */ 26 if (addv == AddValues) stash->array[j] += val; 27 else stash->array[j] = val; 28 found = 1; 29 break; 30 } 31 } 32 if (!found) { /* not found so add to end */ 33 if ( stash->n == stash->nmax ) { 34 /* allocate a larger stash */ 35 n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*( 36 2*sizeof(int) + sizeof(Scalar))); 37 CHKPTR(n_array); 38 n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE); 39 n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE); 40 MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar)); 41 MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int)); 42 MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int)); 43 if (stash->array) FREE(stash->array); 44 stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy; 45 stash->nmax += CHUNCKSIZE; 46 } 47 stash->array[stash->n] = val; 48 stash->idx[stash->n] = row; 49 stash->idy[stash->n++] = idxn[i]; 50 } 51 } 52 return 0; 53 } 54 55 static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n, 56 int *idxn,Scalar *v,InsertMode addv) 57 { 58 Matimpiaij *aij = (Matimpiaij *) mat->data; 59 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 60 int cstart = aij->cstart, cend = aij->cend,row,col; 61 62 if (aij->insertmode != NotSetValues && aij->insertmode != addv) { 63 SETERR(1,"You cannot mix inserts and adds"); 64 } 65 aij->insertmode = addv; 66 for ( i=0; i<m; i++ ) { 67 if (idxm[i] < 0) SETERR(1,"Negative row index"); 68 if (idxm[i] >= aij->M) SETERR(1,"Row index too large"); 69 if (idxm[i] >= rstart && idxm[i] < rend) { 70 row = idxm[i] - rstart; 71 for ( j=0; j<n; j++ ) { 72 if (idxn[j] < 0) SETERR(1,"Negative column index"); 73 if (idxn[j] >= aij->N) SETERR(1,"Column index too large"); 74 if (idxn[j] >= cstart && idxn[j] < cend){ 75 col = idxn[j] - cstart; 76 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 77 } 78 else { 79 if (aij->assembled) { 80 SETERR(1,"Cannot insert off diagonal block in already\ 81 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 82 if your need this feature"); 83 } 84 col = idxn[j]; 85 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 86 } 87 } 88 } 89 else { 90 ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr); 91 } 92 } 93 return 0; 94 } 95 96 /* 97 the assembly code is alot like the code for vectors, we should 98 sometime derive a single assembly code that can be used for 99 either case. 100 */ 101 102 static int MatiAIJBeginAssemble(Mat mat) 103 { 104 Matimpiaij *aij = (Matimpiaij *) mat->data; 105 MPI_Comm comm = mat->comm; 106 int numtids = aij->numtids, *owners = aij->rowners; 107 int mytid = aij->mytid; 108 MPI_Request *send_waits,*recv_waits; 109 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 110 int tag = 50, *owner,*starts,count; 111 InsertMode addv; 112 Scalar *rvalues,*svalues; 113 114 /* make sure all processors are either in INSERTMODE or ADDMODE */ 115 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 116 MPI_BOR,comm); 117 if (addv == (AddValues|InsertValues)) { 118 SETERR(1,"Some processors have inserted while others have added"); 119 } 120 aij->insertmode = addv; /* in case this processor had no cache */ 121 122 /* first count number of contributors to each processor */ 123 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 124 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 125 owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner); 126 for ( i=0; i<aij->stash.n; i++ ) { 127 idx = aij->stash.idx[i]; 128 for ( j=0; j<numtids; j++ ) { 129 if (idx >= owners[j] && idx < owners[j+1]) { 130 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 131 } 132 } 133 } 134 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 135 136 /* inform other processors of number of messages and max length*/ 137 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 138 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 139 nreceives = work[mytid]; 140 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 141 nmax = work[mytid]; 142 FREE(work); 143 144 /* post receives: 145 1) each message will consist of ordered pairs 146 (global index,value) we store the global index as a double 147 to simplify the message passing. 148 2) since we don't know how long each individual message is we 149 allocate the largest needed buffer for each receive. Potentially 150 this is a lot of wasted space. 151 152 153 This could be done better. 154 */ 155 rvalues = (Scalar *) MALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 156 CHKPTR(rvalues); 157 recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request)); 158 CHKPTR(recv_waits); 159 for ( i=0; i<nreceives; i++ ) { 160 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag, 161 comm,recv_waits+i); 162 } 163 164 /* do sends: 165 1) starts[i] gives the starting index in svalues for stuff going to 166 the ith processor 167 */ 168 svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 169 CHKPTR(svalues); 170 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 171 CHKPTR(send_waits); 172 starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts); 173 starts[0] = 0; 174 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 175 for ( i=0; i<aij->stash.n; i++ ) { 176 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 177 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 178 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 179 } 180 FREE(owner); 181 starts[0] = 0; 182 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 183 count = 0; 184 for ( i=0; i<numtids; i++ ) { 185 if (procs[i]) { 186 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag, 187 comm,send_waits+count++); 188 } 189 } 190 FREE(starts); FREE(nprocs); 191 192 /* Free cache space */ 193 aij->stash.nmax = aij->stash.n = 0; 194 if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;} 195 196 aij->svalues = svalues; aij->rvalues = rvalues; 197 aij->nsends = nsends; aij->nrecvs = nreceives; 198 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 199 aij->rmax = nmax; 200 201 return 0; 202 } 203 extern int MPIAIJSetUpMultiply(Mat); 204 205 static int MatiAIJEndAssemble(Mat mat) 206 { 207 int ierr; 208 Matimpiaij *aij = (Matimpiaij *) mat->data; 209 210 MPI_Status *send_status,recv_status; 211 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 212 int row,col; 213 Scalar *values,val; 214 InsertMode addv = aij->insertmode; 215 216 /* wait on receives */ 217 while (count) { 218 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 219 /* unpack receives into our local space */ 220 values = aij->rvalues + 3*imdex*aij->rmax; 221 MPI_Get_count(&recv_status,MPI_SCALAR,&n); 222 n = n/3; 223 for ( i=0; i<n; i++ ) { 224 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 225 col = (int) PETSCREAL(values[3*i+1]); 226 val = values[3*i+2]; 227 if (col >= aij->cstart && col < aij->cend) { 228 col -= aij->cstart; 229 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 230 } 231 else { 232 if (aij->assembled) { 233 SETERR(1,"Cannot insert off diagonal block in already\ 234 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 235 if your need this feature"); 236 } 237 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 238 } 239 } 240 count--; 241 } 242 FREE(aij->recv_waits); FREE(aij->rvalues); 243 244 /* wait on sends */ 245 if (aij->nsends) { 246 send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) ); 247 CHKPTR(send_status); 248 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 249 FREE(send_status); 250 } 251 FREE(aij->send_waits); FREE(aij->svalues); 252 253 aij->insertmode = NotSetValues; 254 ierr = MatBeginAssembly(aij->A); CHKERR(ierr); 255 ierr = MatEndAssembly(aij->A); CHKERR(ierr); 256 257 258 if (aij->assembled) { 259 /* this is because the present implimentation doesn't support 260 inserting values off the diagonal block.*/ 261 return 0; 262 } 263 264 ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr); 265 ierr = MatBeginAssembly(aij->B); CHKERR(ierr); 266 ierr = MatEndAssembly(aij->B); CHKERR(ierr); 267 268 aij->assembled = 1; 269 return 0; 270 } 271 272 static int MatiZero(Mat A) 273 { 274 Matimpiaij *l = (Matimpiaij *) A->data; 275 276 MatZeroEntries(l->A); MatZeroEntries(l->B); 277 return 0; 278 } 279 280 /* again this uses the same basic stratagy as in the assembly and 281 scatter create routines, we should try to do it systemamatically 282 if we can figure out the proper level of generality. */ 283 284 /* the code does not do the diagonal entries correctly unless the 285 matrix is square and the column and row owerships are identical. 286 This is a BUG. The only way to fix it seems to be to access 287 aij->A and aij->B directly and not through the MatZeroRows() 288 routine. 289 */ 290 static int MatiZerorows(Mat A,IS is,Scalar *diag) 291 { 292 Matimpiaij *l = (Matimpiaij *) A->data; 293 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 294 int *procs,*nprocs,j,found,idx,nsends,*work; 295 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 296 int *rvalues,tag = 67,count,base,slen,n,*source; 297 int *lens,imdex,*lrows,*values; 298 MPI_Comm comm = A->comm; 299 MPI_Request *send_waits,*recv_waits; 300 MPI_Status recv_status,*send_status; 301 IS istmp; 302 303 if (!l->assembled) SETERR(1,"MatiZerorows: must assmble matrix first"); 304 ierr = ISGetLocalSize(is,&N); CHKERR(ierr); 305 ierr = ISGetIndices(is,&rows); CHKERR(ierr); 306 307 /* first count number of contributors to each processor */ 308 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 309 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 310 owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/ 311 for ( i=0; i<N; i++ ) { 312 idx = rows[i]; 313 found = 0; 314 for ( j=0; j<numtids; j++ ) { 315 if (idx >= owners[j] && idx < owners[j+1]) { 316 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 317 } 318 } 319 if (!found) SETERR(1,"Imdex out of range"); 320 } 321 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 322 323 /* inform other processors of number of messages and max length*/ 324 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 325 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 326 nrecvs = work[mytid]; 327 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 328 nmax = work[mytid]; 329 FREE(work); 330 331 /* post receives: */ 332 rvalues = (int *) MALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 333 CHKPTR(rvalues); 334 recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request)); 335 CHKPTR(recv_waits); 336 for ( i=0; i<nrecvs; i++ ) { 337 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 338 comm,recv_waits+i); 339 } 340 341 /* do sends: 342 1) starts[i] gives the starting index in svalues for stuff going to 343 the ith processor 344 */ 345 svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues); 346 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 347 CHKPTR(send_waits); 348 starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts); 349 starts[0] = 0; 350 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 351 for ( i=0; i<N; i++ ) { 352 svalues[starts[owner[i]]++] = rows[i]; 353 } 354 ISRestoreIndices(is,&rows); 355 356 starts[0] = 0; 357 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 358 count = 0; 359 for ( i=0; i<numtids; i++ ) { 360 if (procs[i]) { 361 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 362 comm,send_waits+count++); 363 } 364 } 365 FREE(starts); 366 367 base = owners[mytid]; 368 369 /* wait on receives */ 370 lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens); 371 source = lens + nrecvs; 372 count = nrecvs; slen = 0; 373 while (count) { 374 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 375 /* unpack receives into our local space */ 376 MPI_Get_count(&recv_status,MPI_INT,&n); 377 source[imdex] = recv_status.MPI_SOURCE; 378 lens[imdex] = n; 379 slen += n; 380 count--; 381 } 382 FREE(recv_waits); 383 384 /* move the data into the send scatter */ 385 lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows); 386 count = 0; 387 for ( i=0; i<nrecvs; i++ ) { 388 values = rvalues + i*nmax; 389 for ( j=0; j<lens[i]; j++ ) { 390 lrows[count++] = values[j] - base; 391 } 392 } 393 FREE(rvalues); FREE(lens); 394 FREE(owner); FREE(nprocs); 395 396 /* actually zap the local rows */ 397 ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr); FREE(lrows); 398 ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr); 399 ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr); 400 ierr = ISDestroy(istmp); CHKERR(ierr); 401 402 /* wait on sends */ 403 if (nsends) { 404 send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) ); 405 CHKPTR(send_status); 406 MPI_Waitall(nsends,send_waits,send_status); 407 FREE(send_status); 408 } 409 FREE(send_waits); FREE(svalues); 410 411 412 return 0; 413 } 414 415 static int MatiAIJMult(Mat aijin,Vec xx,Vec yy) 416 { 417 Matimpiaij *aij = (Matimpiaij *) aijin->data; 418 int ierr; 419 if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first"); 420 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 421 CHKERR(ierr); 422 ierr = MatMult(aij->A,xx,yy); CHKERR(ierr); 423 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 424 CHKERR(ierr); 425 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr); 426 return 0; 427 } 428 429 static int MatiAIJMultadd(Mat aijin,Vec xx,Vec yy,Vec zz) 430 { 431 Matimpiaij *aij = (Matimpiaij *) aijin->data; 432 int ierr; 433 if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first"); 434 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 435 CHKERR(ierr); 436 ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr); 437 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 438 CHKERR(ierr); 439 ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr); 440 return 0; 441 } 442 443 static int MatiAIJMultTrans(Mat aijin,Vec xx,Vec yy) 444 { 445 Matimpiaij *aij = (Matimpiaij *) aijin->data; 446 int ierr; 447 448 if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 449 /* do nondiagonal part */ 450 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 451 /* send it on its way */ 452 ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues, 453 ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 454 /* do local part */ 455 ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr); 456 /* receive remote parts: note this assumes the values are not actually */ 457 /* inserted in yy until the next line, which is true for my implementation*/ 458 /* but is not perhaps always true. */ 459 ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse, 460 aij->Mvctx); CHKERR(ierr); 461 return 0; 462 } 463 464 static int MatiAIJMultTransadd(Mat aijin,Vec xx,Vec yy,Vec zz) 465 { 466 Matimpiaij *aij = (Matimpiaij *) aijin->data; 467 int ierr; 468 469 if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 470 /* do nondiagonal part */ 471 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 472 /* send it on its way */ 473 ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues, 474 ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 475 /* do local part */ 476 ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr); 477 /* receive remote parts: note this assumes the values are not actually */ 478 /* inserted in yy until the next line, which is true for my implementation*/ 479 /* but is not perhaps always true. */ 480 ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse, 481 aij->Mvctx); CHKERR(ierr); 482 return 0; 483 } 484 485 /* 486 This only works correctly for square matrices where the subblock A->A is the 487 diagonal block 488 */ 489 static int MatiAIJgetdiag(Mat Ain,Vec v) 490 { 491 Matimpiaij *A = (Matimpiaij *) Ain->data; 492 if (!A->assembled) SETERR(1,"MatiAIJgetdiag: must assmble matrix first"); 493 return MatGetDiagonal(A->A,v); 494 } 495 496 static int MatiAIJdestroy(PetscObject obj) 497 { 498 Mat mat = (Mat) obj; 499 Matimpiaij *aij = (Matimpiaij *) mat->data; 500 int ierr; 501 FREE(aij->rowners); 502 ierr = MatDestroy(aij->A); CHKERR(ierr); 503 ierr = MatDestroy(aij->B); CHKERR(ierr); 504 if (aij->lvec) VecDestroy(aij->lvec); 505 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 506 FREE(aij); PETSCHEADERDESTROY(mat); 507 return 0; 508 } 509 510 static int MatiView(PetscObject obj,Viewer viewer) 511 { 512 Mat mat = (Mat) obj; 513 Matimpiaij *aij = (Matimpiaij *) mat->data; 514 int ierr; 515 516 if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 517 MPE_Seq_begin(mat->comm,1); 518 printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 519 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 520 aij->cend); 521 ierr = MatView(aij->A,0); CHKERR(ierr); 522 ierr = MatView(aij->B,0); CHKERR(ierr); 523 fflush(stdout); 524 MPE_Seq_end(mat->comm,1); 525 return 0; 526 } 527 528 extern int MatiAIJmarkdiag(Matiaij *); 529 /* 530 This has to provide several versions. 531 532 1) per sequential 533 2) a) use only local smoothing updating outer values only once. 534 b) local smoothing updating outer values each inner iteration 535 3) color updating out values betwen colors. 536 */ 537 static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,double shift, 538 int its,Vec xx) 539 { 540 Matimpiaij *mat = (Matimpiaij *) matin->data; 541 Mat AA = mat->A, BB = mat->B; 542 Matiaij *A = (Matiaij *) AA->data, *B = (Matiaij *)BB->data; 543 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 544 int ierr,*idx, *diag; 545 int n = mat->n, m = mat->m, i; 546 Vec tt; 547 548 if (!mat->assembled) SETERR(1,"MatiAIJRelax: must assmble matrix first"); 549 550 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 551 xs = x -1; /* shift by one for index start of 1 */ 552 ls--; 553 if (!A->diag) {if ((ierr = MatiAIJmarkdiag(A))) return ierr;} 554 diag = A->diag; 555 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 556 SETERR(1,"That option not yet support for parallel AIJ matrices"); 557 } 558 if (flag & SOR_EISENSTAT) { 559 /* Let A = L + U + D; where L is lower trianglar, 560 U is upper triangular, E is diagonal; This routine applies 561 562 (L + E)^{-1} A (U + E)^{-1} 563 564 to a vector efficiently using Eisenstat's trick. This is for 565 the case of SSOR preconditioner, so E is D/omega where omega 566 is the relaxation factor. 567 */ 568 ierr = VecCreate(xx,&tt); CHKERR(ierr); 569 VecGetArray(tt,&t); 570 scale = (2.0/omega) - 1.0; 571 /* x = (E + U)^{-1} b */ 572 VecSet(&zero,mat->lvec); 573 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 574 mat->Mvctx); CHKERR(ierr); 575 for ( i=m-1; i>-1; i-- ) { 576 n = A->i[i+1] - diag[i] - 1; 577 idx = A->j + diag[i]; 578 v = A->a + diag[i]; 579 sum = b[i]; 580 SPARSEDENSEMDOT(sum,xs,v,idx,n); 581 d = shift + A->a[diag[i]-1]; 582 n = B->i[i+1] - B->i[i]; 583 idx = B->j + B->i[i] - 1; 584 v = B->a + B->i[i] - 1; 585 SPARSEDENSEMDOT(sum,ls,v,idx,n); 586 x[i] = omega*(sum/d); 587 } 588 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 589 mat->Mvctx); CHKERR(ierr); 590 591 /* t = b - (2*E - D)x */ 592 v = A->a; 593 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 594 595 /* t = (E + L)^{-1}t */ 596 ts = t - 1; /* shifted by one for index start of a or mat->j*/ 597 diag = A->diag; 598 VecSet(&zero,mat->lvec); 599 ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown, 600 mat->Mvctx); CHKERR(ierr); 601 for ( i=0; i<m; i++ ) { 602 n = diag[i] - A->i[i]; 603 idx = A->j + A->i[i] - 1; 604 v = A->a + A->i[i] - 1; 605 sum = t[i]; 606 SPARSEDENSEMDOT(sum,ts,v,idx,n); 607 d = shift + A->a[diag[i]-1]; 608 n = B->i[i+1] - B->i[i]; 609 idx = B->j + B->i[i] - 1; 610 v = B->a + B->i[i] - 1; 611 SPARSEDENSEMDOT(sum,ls,v,idx,n); 612 t[i] = omega*(sum/d); 613 } 614 ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown, 615 mat->Mvctx); CHKERR(ierr); 616 /* x = x + t */ 617 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 618 VecDestroy(tt); 619 return 0; 620 } 621 622 623 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 624 if (flag & SOR_ZERO_INITIAL_GUESS) { 625 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 626 } 627 else { 628 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 629 CHKERR(ierr); 630 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 631 CHKERR(ierr); 632 } 633 while (its--) { 634 /* go down through the rows */ 635 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 636 mat->Mvctx); CHKERR(ierr); 637 for ( i=0; i<m; i++ ) { 638 n = A->i[i+1] - A->i[i]; 639 idx = A->j + A->i[i] - 1; 640 v = A->a + A->i[i] - 1; 641 sum = b[i]; 642 SPARSEDENSEMDOT(sum,xs,v,idx,n); 643 d = shift + A->a[diag[i]-1]; 644 n = B->i[i+1] - B->i[i]; 645 idx = B->j + B->i[i] - 1; 646 v = B->a + B->i[i] - 1; 647 SPARSEDENSEMDOT(sum,ls,v,idx,n); 648 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 649 } 650 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 651 mat->Mvctx); CHKERR(ierr); 652 /* come up through the rows */ 653 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 654 mat->Mvctx); CHKERR(ierr); 655 for ( i=m-1; i>-1; i-- ) { 656 n = A->i[i+1] - A->i[i]; 657 idx = A->j + A->i[i] - 1; 658 v = A->a + A->i[i] - 1; 659 sum = b[i]; 660 SPARSEDENSEMDOT(sum,xs,v,idx,n); 661 d = shift + A->a[diag[i]-1]; 662 n = B->i[i+1] - B->i[i]; 663 idx = B->j + B->i[i] - 1; 664 v = B->a + B->i[i] - 1; 665 SPARSEDENSEMDOT(sum,ls,v,idx,n); 666 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 667 } 668 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 669 mat->Mvctx); CHKERR(ierr); 670 } 671 } 672 else if (flag & SOR_FORWARD_SWEEP){ 673 if (flag & SOR_ZERO_INITIAL_GUESS) { 674 VecSet(&zero,mat->lvec); 675 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 676 mat->Mvctx); CHKERR(ierr); 677 for ( i=0; i<m; i++ ) { 678 n = diag[i] - A->i[i]; 679 idx = A->j + A->i[i] - 1; 680 v = A->a + A->i[i] - 1; 681 sum = b[i]; 682 SPARSEDENSEMDOT(sum,xs,v,idx,n); 683 d = shift + A->a[diag[i]-1]; 684 n = B->i[i+1] - B->i[i]; 685 idx = B->j + B->i[i] - 1; 686 v = B->a + B->i[i] - 1; 687 SPARSEDENSEMDOT(sum,ls,v,idx,n); 688 x[i] = omega*(sum/d); 689 } 690 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 691 mat->Mvctx); CHKERR(ierr); 692 its--; 693 } 694 while (its--) { 695 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 696 CHKERR(ierr); 697 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 698 CHKERR(ierr); 699 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 700 mat->Mvctx); CHKERR(ierr); 701 for ( i=0; i<m; i++ ) { 702 n = A->i[i+1] - A->i[i]; 703 idx = A->j + A->i[i] - 1; 704 v = A->a + A->i[i] - 1; 705 sum = b[i]; 706 SPARSEDENSEMDOT(sum,xs,v,idx,n); 707 d = shift + A->a[diag[i]-1]; 708 n = B->i[i+1] - B->i[i]; 709 idx = B->j + B->i[i] - 1; 710 v = B->a + B->i[i] - 1; 711 SPARSEDENSEMDOT(sum,ls,v,idx,n); 712 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 713 } 714 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 715 mat->Mvctx); CHKERR(ierr); 716 } 717 } 718 else if (flag & SOR_BACKWARD_SWEEP){ 719 if (flag & SOR_ZERO_INITIAL_GUESS) { 720 VecSet(&zero,mat->lvec); 721 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 722 mat->Mvctx); CHKERR(ierr); 723 for ( i=m-1; i>-1; i-- ) { 724 n = A->i[i+1] - diag[i] - 1; 725 idx = A->j + diag[i]; 726 v = A->a + diag[i]; 727 sum = b[i]; 728 SPARSEDENSEMDOT(sum,xs,v,idx,n); 729 d = shift + A->a[diag[i]-1]; 730 n = B->i[i+1] - B->i[i]; 731 idx = B->j + B->i[i] - 1; 732 v = B->a + B->i[i] - 1; 733 SPARSEDENSEMDOT(sum,ls,v,idx,n); 734 x[i] = omega*(sum/d); 735 } 736 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 737 mat->Mvctx); CHKERR(ierr); 738 its--; 739 } 740 while (its--) { 741 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown, 742 mat->Mvctx); CHKERR(ierr); 743 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown, 744 mat->Mvctx); CHKERR(ierr); 745 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 746 mat->Mvctx); CHKERR(ierr); 747 for ( i=m-1; i>-1; i-- ) { 748 n = A->i[i+1] - A->i[i]; 749 idx = A->j + A->i[i] - 1; 750 v = A->a + A->i[i] - 1; 751 sum = b[i]; 752 SPARSEDENSEMDOT(sum,xs,v,idx,n); 753 d = shift + A->a[diag[i]-1]; 754 n = B->i[i+1] - B->i[i]; 755 idx = B->j + B->i[i] - 1; 756 v = B->a + B->i[i] - 1; 757 SPARSEDENSEMDOT(sum,ls,v,idx,n); 758 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 759 } 760 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 761 mat->Mvctx); CHKERR(ierr); 762 } 763 } 764 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 765 if (flag & SOR_ZERO_INITIAL_GUESS) { 766 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 767 } 768 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 769 CHKERR(ierr); 770 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 771 CHKERR(ierr); 772 while (its--) { 773 /* go down through the rows */ 774 for ( i=0; i<m; i++ ) { 775 n = A->i[i+1] - A->i[i]; 776 idx = A->j + A->i[i] - 1; 777 v = A->a + A->i[i] - 1; 778 sum = b[i]; 779 SPARSEDENSEMDOT(sum,xs,v,idx,n); 780 d = shift + A->a[diag[i]-1]; 781 n = B->i[i+1] - B->i[i]; 782 idx = B->j + B->i[i] - 1; 783 v = B->a + B->i[i] - 1; 784 SPARSEDENSEMDOT(sum,ls,v,idx,n); 785 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 786 } 787 /* come up through the rows */ 788 for ( i=m-1; i>-1; i-- ) { 789 n = A->i[i+1] - A->i[i]; 790 idx = A->j + A->i[i] - 1; 791 v = A->a + A->i[i] - 1; 792 sum = b[i]; 793 SPARSEDENSEMDOT(sum,xs,v,idx,n); 794 d = shift + A->a[diag[i]-1]; 795 n = B->i[i+1] - B->i[i]; 796 idx = B->j + B->i[i] - 1; 797 v = B->a + B->i[i] - 1; 798 SPARSEDENSEMDOT(sum,ls,v,idx,n); 799 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 800 } 801 } 802 } 803 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 804 if (flag & SOR_ZERO_INITIAL_GUESS) { 805 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 806 } 807 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 808 CHKERR(ierr); 809 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 810 CHKERR(ierr); 811 while (its--) { 812 for ( i=0; i<m; i++ ) { 813 n = A->i[i+1] - A->i[i]; 814 idx = A->j + A->i[i] - 1; 815 v = A->a + A->i[i] - 1; 816 sum = b[i]; 817 SPARSEDENSEMDOT(sum,xs,v,idx,n); 818 d = shift + A->a[diag[i]-1]; 819 n = B->i[i+1] - B->i[i]; 820 idx = B->j + B->i[i] - 1; 821 v = B->a + B->i[i] - 1; 822 SPARSEDENSEMDOT(sum,ls,v,idx,n); 823 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 824 } 825 } 826 } 827 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 828 if (flag & SOR_ZERO_INITIAL_GUESS) { 829 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 830 } 831 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll, 832 mat->Mvctx); CHKERR(ierr); 833 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll, 834 mat->Mvctx); CHKERR(ierr); 835 while (its--) { 836 for ( i=m-1; i>-1; i-- ) { 837 n = A->i[i+1] - A->i[i]; 838 idx = A->j + A->i[i] - 1; 839 v = A->a + A->i[i] - 1; 840 sum = b[i]; 841 SPARSEDENSEMDOT(sum,xs,v,idx,n); 842 d = shift + A->a[diag[i]-1]; 843 n = B->i[i+1] - B->i[i]; 844 idx = B->j + B->i[i] - 1; 845 v = B->a + B->i[i] - 1; 846 SPARSEDENSEMDOT(sum,ls,v,idx,n); 847 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 848 } 849 } 850 } 851 return 0; 852 } 853 static int MatiAIJinsopt(Mat aijin,int op) 854 { 855 Matimpiaij *aij = (Matimpiaij *) aijin->data; 856 857 if (op == NO_NEW_NONZERO_LOCATIONS) { 858 MatSetOption(aij->A,op); 859 MatSetOption(aij->B,op); 860 } 861 else if (op == YES_NEW_NONZERO_LOCATIONS) { 862 MatSetOption(aij->A,op); 863 MatSetOption(aij->B,op); 864 } 865 else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported"); 866 return 0; 867 } 868 869 static int MatiAIJsize(Mat matin,int *m,int *n) 870 { 871 Matimpiaij *mat = (Matimpiaij *) matin->data; 872 *m = mat->M; *n = mat->N; 873 return 0; 874 } 875 876 static int MatiAIJlocalsize(Mat matin,int *m,int *n) 877 { 878 Matimpiaij *mat = (Matimpiaij *) matin->data; 879 *m = mat->m; *n = mat->n; 880 return 0; 881 } 882 883 static int MatiAIJrange(Mat matin,int *m,int *n) 884 { 885 Matimpiaij *mat = (Matimpiaij *) matin->data; 886 *m = mat->rstart; *n = mat->rend; 887 return 0; 888 } 889 890 static int MatiCopy(Mat,Mat *); 891 892 /* -------------------------------------------------------------------*/ 893 static struct _MatOps MatOps = {MatiAIJInsertValues, 894 0, 0, 895 MatiAIJMult,MatiAIJMultadd,MatiAIJMultTrans,MatiAIJMultTransadd, 896 0,0,0,0, 897 0,0, 898 MatiAIJrelax, 899 0, 900 0,0,0, 901 MatiCopy, 902 MatiAIJgetdiag,0,0, 903 MatiAIJBeginAssemble,MatiAIJEndAssemble, 904 0, 905 MatiAIJinsopt,MatiZero,MatiZerorows,0, 906 0,0,0,0, 907 MatiAIJsize,MatiAIJlocalsize,MatiAIJrange }; 908 909 910 911 /*@ 912 913 MatCreateMPIAIJ - Creates a sparse parallel matrix 914 in AIJ format. 915 916 Input Parameters: 917 . comm - MPI communicator 918 . m,n - number of local rows and columns (or -1 to have calculated) 919 . M,N - global rows and columns (or -1 to have calculated) 920 . d_nz - total number nonzeros in diagonal portion of matrix 921 . d_nzz - number of nonzeros per row in diagonal portion of matrix or null 922 . You must leave room for the diagonal entry even if it is zero. 923 . o_nz - total number nonzeros in off-diagonal portion of matrix 924 . o_nzz - number of nonzeros per row in off-diagonal portion of matrix 925 . or null. You must have at least one nonzero per row. 926 927 Output parameters: 928 . newmat - the matrix 929 930 Keywords: matrix, aij, compressed row, sparse, parallel 931 @*/ 932 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 933 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 934 { 935 Mat mat; 936 Matimpiaij *aij; 937 int ierr, i,sum[2],work[2]; 938 *newmat = 0; 939 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATAIJMPI,comm); 940 mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 941 mat->ops = &MatOps; 942 mat->destroy = MatiAIJdestroy; 943 mat->view = MatiView; 944 mat->factor = 0; 945 mat->row = 0; 946 mat->col = 0; 947 948 mat->comm = comm; 949 aij->insertmode = NotSetValues; 950 MPI_Comm_rank(comm,&aij->mytid); 951 MPI_Comm_size(comm,&aij->numtids); 952 953 if (M == -1 || N == -1) { 954 work[0] = m; work[1] = n; 955 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 956 if (M == -1) M = sum[0]; 957 if (N == -1) N = sum[1]; 958 } 959 if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 960 if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 961 aij->m = m; 962 aij->n = n; 963 aij->N = N; 964 aij->M = M; 965 966 /* build local table of row and column ownerships */ 967 aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int)); 968 CHKPTR(aij->rowners); 969 aij->cowners = aij->rowners + aij->numtids + 1; 970 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 971 aij->rowners[0] = 0; 972 for ( i=2; i<=aij->numtids; i++ ) { 973 aij->rowners[i] += aij->rowners[i-1]; 974 } 975 aij->rstart = aij->rowners[aij->mytid]; 976 aij->rend = aij->rowners[aij->mytid+1]; 977 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 978 aij->cowners[0] = 0; 979 for ( i=2; i<=aij->numtids; i++ ) { 980 aij->cowners[i] += aij->cowners[i-1]; 981 } 982 aij->cstart = aij->cowners[aij->mytid]; 983 aij->cend = aij->cowners[aij->mytid+1]; 984 985 986 ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr); 987 ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr); 988 989 /* build cache for off array entries formed */ 990 aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */ 991 aij->stash.n = 0; 992 aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) + 993 sizeof(Scalar))); CHKPTR(aij->stash.array); 994 aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax); 995 aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax); 996 997 /* stuff used for matrix vector multiply */ 998 aij->lvec = 0; 999 aij->Mvctx = 0; 1000 aij->assembled = 0; 1001 1002 *newmat = mat; 1003 return 0; 1004 } 1005 1006 static int MatiCopy(Mat matin,Mat *newmat) 1007 { 1008 Mat mat; 1009 Matimpiaij *aij,*oldmat = (Matimpiaij *) matin->data; 1010 int ierr; 1011 *newmat = 0; 1012 1013 if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix"); 1014 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATAIJMPI,matin->comm); 1015 mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 1016 mat->ops = &MatOps; 1017 mat->destroy = MatiAIJdestroy; 1018 mat->view = MatiView; 1019 mat->factor = matin->factor; 1020 mat->row = 0; 1021 mat->col = 0; 1022 1023 aij->m = oldmat->m; 1024 aij->n = oldmat->n; 1025 aij->M = oldmat->M; 1026 aij->N = oldmat->N; 1027 1028 aij->assembled = 1; 1029 aij->rstart = oldmat->rstart; 1030 aij->rend = oldmat->rend; 1031 aij->cstart = oldmat->cstart; 1032 aij->cend = oldmat->cend; 1033 aij->numtids = oldmat->numtids; 1034 aij->mytid = oldmat->mytid; 1035 aij->insertmode = NotSetValues; 1036 1037 aij->rowners = (int *) MALLOC( (aij->numtids+1)*sizeof(int) ); 1038 CHKPTR(aij->rowners); 1039 MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1040 aij->stash.nmax = 0; 1041 aij->stash.n = 0; 1042 aij->stash.array= 0; 1043 mat->comm = matin->comm; 1044 1045 ierr = VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr); 1046 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr); 1047 ierr = MatCopy(oldmat->A,&aij->A); CHKERR(ierr); 1048 ierr = MatCopy(oldmat->B,&aij->B); CHKERR(ierr); 1049 *newmat = mat; 1050 return 0; 1051 } 1052