1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 637 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 638 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 639 */ 640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 641 { 642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 643 Mat A = aij->A; /* diagonal part of the matrix */ 644 Mat B = aij->B; /* offdiagonal part of the matrix */ 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 647 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 648 PetscInt *ailen = a->ilen,*aj = a->j; 649 PetscInt *bilen = b->ilen,*bj = b->j; 650 PetscInt am = aij->A->rmap->n,j; 651 PetscInt diag_so_far = 0,dnz; 652 PetscInt offd_so_far = 0,onz; 653 654 PetscFunctionBegin; 655 /* Iterate over all rows of the matrix */ 656 for (j=0; j<am; j++) { 657 dnz = onz = 0; 658 /* Iterate over all non-zero columns of the current row */ 659 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 660 /* If column is in the diagonal */ 661 if (mat_j[col] >= cstart && mat_j[col] < cend) { 662 aj[diag_so_far++] = mat_j[col] - cstart; 663 dnz++; 664 } else { /* off-diagonal entries */ 665 bj[offd_so_far++] = mat_j[col]; 666 onz++; 667 } 668 } 669 ailen[j] = dnz; 670 bilen[j] = onz; 671 } 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 677 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 678 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 679 */ 680 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[], 681 const PetscInt full_diag_i[],const PetscInt full_offd_i[]) 682 { 683 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 684 Mat A = aij->A; /* diagonal part of the matrix */ 685 Mat B = aij->B; /* offdiagonal part of the matrix */ 686 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 687 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 688 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 689 PetscInt *ailen = a->ilen,*aj = a->j; 690 PetscInt *bilen = b->ilen,*bj = b->j; 691 PetscInt am = aij->A->rmap->n,j; 692 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 693 PetscScalar *aa = a->a,*ba = b->a; 694 695 PetscFunctionBegin; 696 /* Iterate over all rows of the matrix */ 697 for (j=0; j<am; j++) { 698 dnz_row = onz_row = 0; 699 /* Iterate over all non-zero columns of the current row */ 700 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 701 rowstart_offd = full_offd_i[j]; 702 rowstart_diag = full_diag_i[j]; 703 /* If column is in the diagonal */ 704 if (mat_j[col] >= cstart && mat_j[col] < cend) { 705 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 706 aa[rowstart_diag+dnz_row] = mat_a[col]; 707 dnz_row++; 708 } else { /* off-diagonal entries */ 709 bj[rowstart_offd+onz_row] = mat_j[col]; 710 ba[rowstart_offd+onz_row] = mat_a[col]; 711 onz_row++; 712 } 713 } 714 ailen[j] = dnz_row; 715 bilen[j] = onz_row; 716 } 717 PetscFunctionReturn(0); 718 } 719 720 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 721 { 722 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 723 PetscErrorCode ierr; 724 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 725 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 726 727 PetscFunctionBegin; 728 for (i=0; i<m; i++) { 729 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 730 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 731 if (idxm[i] >= rstart && idxm[i] < rend) { 732 row = idxm[i] - rstart; 733 for (j=0; j<n; j++) { 734 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 735 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 736 if (idxn[j] >= cstart && idxn[j] < cend) { 737 col = idxn[j] - cstart; 738 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 739 } else { 740 if (!aij->colmap) { 741 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 742 } 743 #if defined(PETSC_USE_CTABLE) 744 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 745 col--; 746 #else 747 col = aij->colmap[idxn[j]] - 1; 748 #endif 749 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 750 else { 751 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 752 } 753 } 754 } 755 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 756 } 757 PetscFunctionReturn(0); 758 } 759 760 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 761 762 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 763 { 764 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 765 PetscErrorCode ierr; 766 PetscInt nstash,reallocs; 767 768 PetscFunctionBegin; 769 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 770 771 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 772 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 773 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 774 PetscFunctionReturn(0); 775 } 776 777 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 778 { 779 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 780 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 781 PetscErrorCode ierr; 782 PetscMPIInt n; 783 PetscInt i,j,rstart,ncols,flg; 784 PetscInt *row,*col; 785 PetscBool other_disassembled; 786 PetscScalar *val; 787 788 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 789 790 PetscFunctionBegin; 791 if (!aij->donotstash && !mat->nooffprocentries) { 792 while (1) { 793 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 794 if (!flg) break; 795 796 for (i=0; i<n; ) { 797 /* Now identify the consecutive vals belonging to the same row */ 798 for (j=i,rstart=row[j]; j<n; j++) { 799 if (row[j] != rstart) break; 800 } 801 if (j < n) ncols = j-i; 802 else ncols = n-i; 803 /* Now assemble all these values with a single function call */ 804 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 805 806 i = j; 807 } 808 } 809 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 810 } 811 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 812 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 813 814 /* determine if any processor has disassembled, if so we must 815 also disassemble ourselfs, in order that we may reassemble. */ 816 /* 817 if nonzero structure of submatrix B cannot change then we know that 818 no processor disassembled thus we can skip this stuff 819 */ 820 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 821 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 822 if (mat->was_assembled && !other_disassembled) { 823 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 824 } 825 } 826 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 827 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 828 } 829 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 830 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 831 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 832 833 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 834 835 aij->rowvalues = 0; 836 837 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 838 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 839 840 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 841 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 842 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 843 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 844 } 845 PetscFunctionReturn(0); 846 } 847 848 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 849 { 850 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 851 PetscErrorCode ierr; 852 853 PetscFunctionBegin; 854 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 855 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 856 PetscFunctionReturn(0); 857 } 858 859 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 860 { 861 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 862 PetscInt *lrows; 863 PetscInt r, len; 864 PetscBool cong; 865 PetscErrorCode ierr; 866 867 PetscFunctionBegin; 868 /* get locally owned rows */ 869 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 870 /* fix right hand side if needed */ 871 if (x && b) { 872 const PetscScalar *xx; 873 PetscScalar *bb; 874 875 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 876 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 877 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 878 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 879 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 880 } 881 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 882 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 883 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 884 if ((diag != 0.0) && cong) { 885 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 886 } else if (diag != 0.0) { 887 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 888 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 889 for (r = 0; r < len; ++r) { 890 const PetscInt row = lrows[r] + A->rmap->rstart; 891 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 892 } 893 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 894 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 895 } else { 896 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 897 } 898 ierr = PetscFree(lrows);CHKERRQ(ierr); 899 900 /* only change matrix nonzero state if pattern was allowed to be changed */ 901 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 902 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 903 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 904 } 905 PetscFunctionReturn(0); 906 } 907 908 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 909 { 910 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 911 PetscErrorCode ierr; 912 PetscMPIInt n = A->rmap->n; 913 PetscInt i,j,r,m,p = 0,len = 0; 914 PetscInt *lrows,*owners = A->rmap->range; 915 PetscSFNode *rrows; 916 PetscSF sf; 917 const PetscScalar *xx; 918 PetscScalar *bb,*mask; 919 Vec xmask,lmask; 920 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 921 const PetscInt *aj, *ii,*ridx; 922 PetscScalar *aa; 923 924 PetscFunctionBegin; 925 /* Create SF where leaves are input rows and roots are owned rows */ 926 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 927 for (r = 0; r < n; ++r) lrows[r] = -1; 928 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 929 for (r = 0; r < N; ++r) { 930 const PetscInt idx = rows[r]; 931 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 932 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 933 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 934 } 935 rrows[r].rank = p; 936 rrows[r].index = rows[r] - owners[p]; 937 } 938 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 939 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 940 /* Collect flags for rows to be zeroed */ 941 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 942 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 943 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 944 /* Compress and put in row numbers */ 945 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 946 /* zero diagonal part of matrix */ 947 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 948 /* handle off diagonal part of matrix */ 949 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 950 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 951 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 952 for (i=0; i<len; i++) bb[lrows[i]] = 1; 953 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 954 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 955 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 956 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 957 if (x) { 958 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 960 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 961 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 962 } 963 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 964 /* remove zeroed rows of off diagonal matrix */ 965 ii = aij->i; 966 for (i=0; i<len; i++) { 967 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 968 } 969 /* loop over all elements of off process part of matrix zeroing removed columns*/ 970 if (aij->compressedrow.use) { 971 m = aij->compressedrow.nrows; 972 ii = aij->compressedrow.i; 973 ridx = aij->compressedrow.rindex; 974 for (i=0; i<m; i++) { 975 n = ii[i+1] - ii[i]; 976 aj = aij->j + ii[i]; 977 aa = aij->a + ii[i]; 978 979 for (j=0; j<n; j++) { 980 if (PetscAbsScalar(mask[*aj])) { 981 if (b) bb[*ridx] -= *aa*xx[*aj]; 982 *aa = 0.0; 983 } 984 aa++; 985 aj++; 986 } 987 ridx++; 988 } 989 } else { /* do not use compressed row format */ 990 m = l->B->rmap->n; 991 for (i=0; i<m; i++) { 992 n = ii[i+1] - ii[i]; 993 aj = aij->j + ii[i]; 994 aa = aij->a + ii[i]; 995 for (j=0; j<n; j++) { 996 if (PetscAbsScalar(mask[*aj])) { 997 if (b) bb[i] -= *aa*xx[*aj]; 998 *aa = 0.0; 999 } 1000 aa++; 1001 aj++; 1002 } 1003 } 1004 } 1005 if (x) { 1006 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1007 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1008 } 1009 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1010 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1011 ierr = PetscFree(lrows);CHKERRQ(ierr); 1012 1013 /* only change matrix nonzero state if pattern was allowed to be changed */ 1014 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1015 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1016 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1017 } 1018 PetscFunctionReturn(0); 1019 } 1020 1021 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1022 { 1023 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1024 PetscErrorCode ierr; 1025 PetscInt nt; 1026 VecScatter Mvctx = a->Mvctx; 1027 1028 PetscFunctionBegin; 1029 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1030 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1031 1032 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1034 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1040 { 1041 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1042 PetscErrorCode ierr; 1043 1044 PetscFunctionBegin; 1045 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1050 { 1051 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1052 PetscErrorCode ierr; 1053 VecScatter Mvctx = a->Mvctx; 1054 1055 PetscFunctionBegin; 1056 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1057 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1058 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1059 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1060 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1065 { 1066 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1067 PetscErrorCode ierr; 1068 PetscBool merged; 1069 1070 PetscFunctionBegin; 1071 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1072 /* do nondiagonal part */ 1073 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1074 if (!merged) { 1075 /* send it on its way */ 1076 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1077 /* do local part */ 1078 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1079 /* receive remote parts: note this assumes the values are not actually */ 1080 /* added in yy until the next line, */ 1081 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1082 } else { 1083 /* do local part */ 1084 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1085 /* send it on its way */ 1086 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1087 /* values actually were received in the Begin() but we need to call this nop */ 1088 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1089 } 1090 PetscFunctionReturn(0); 1091 } 1092 1093 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1094 { 1095 MPI_Comm comm; 1096 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1097 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1098 IS Me,Notme; 1099 PetscErrorCode ierr; 1100 PetscInt M,N,first,last,*notme,i; 1101 PetscMPIInt size; 1102 1103 PetscFunctionBegin; 1104 /* Easy test: symmetric diagonal block */ 1105 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1106 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1107 if (!*f) PetscFunctionReturn(0); 1108 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1109 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1110 if (size == 1) PetscFunctionReturn(0); 1111 1112 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1113 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1114 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1115 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1116 for (i=0; i<first; i++) notme[i] = i; 1117 for (i=last; i<M; i++) notme[i-last+first] = i; 1118 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1119 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1120 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1121 Aoff = Aoffs[0]; 1122 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1123 Boff = Boffs[0]; 1124 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1125 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1126 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1127 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1128 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1129 ierr = PetscFree(notme);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1134 { 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1145 PetscErrorCode ierr; 1146 1147 PetscFunctionBegin; 1148 /* do nondiagonal part */ 1149 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1150 /* send it on its way */ 1151 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1152 /* do local part */ 1153 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1154 /* receive remote parts */ 1155 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 /* 1160 This only works correctly for square matrices where the subblock A->A is the 1161 diagonal block 1162 */ 1163 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1164 { 1165 PetscErrorCode ierr; 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1167 1168 PetscFunctionBegin; 1169 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1170 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1171 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1176 { 1177 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1178 PetscErrorCode ierr; 1179 1180 PetscFunctionBegin; 1181 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1182 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1183 PetscFunctionReturn(0); 1184 } 1185 1186 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1187 { 1188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 #if defined(PETSC_USE_LOG) 1193 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1194 #endif 1195 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1196 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1197 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1198 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1199 #if defined(PETSC_USE_CTABLE) 1200 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1201 #else 1202 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1203 #endif 1204 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1205 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1206 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1207 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1208 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1209 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1210 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1211 1212 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1213 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1221 #if defined(PETSC_HAVE_ELEMENTAL) 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1223 #endif 1224 #if defined(PETSC_HAVE_HYPRE) 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1227 #endif 1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1234 { 1235 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1236 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1237 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1238 PetscErrorCode ierr; 1239 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1240 int fd; 1241 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1242 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1243 PetscScalar *column_values; 1244 PetscInt message_count,flowcontrolcount; 1245 FILE *file; 1246 1247 PetscFunctionBegin; 1248 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1249 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1250 nz = A->nz + B->nz; 1251 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1252 if (!rank) { 1253 header[0] = MAT_FILE_CLASSID; 1254 header[1] = mat->rmap->N; 1255 header[2] = mat->cmap->N; 1256 1257 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1258 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1259 /* get largest number of rows any processor has */ 1260 rlen = mat->rmap->n; 1261 range = mat->rmap->range; 1262 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1263 } else { 1264 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1265 rlen = mat->rmap->n; 1266 } 1267 1268 /* load up the local row counts */ 1269 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1270 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1271 1272 /* store the row lengths to the file */ 1273 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1274 if (!rank) { 1275 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1276 for (i=1; i<size; i++) { 1277 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1278 rlen = range[i+1] - range[i]; 1279 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1280 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1281 } 1282 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1283 } else { 1284 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1285 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1286 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1287 } 1288 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1289 1290 /* load up the local column indices */ 1291 nzmax = nz; /* th processor needs space a largest processor needs */ 1292 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1293 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1294 cnt = 0; 1295 for (i=0; i<mat->rmap->n; i++) { 1296 for (j=B->i[i]; j<B->i[i+1]; j++) { 1297 if ((col = garray[B->j[j]]) > cstart) break; 1298 column_indices[cnt++] = col; 1299 } 1300 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1301 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1302 } 1303 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1304 1305 /* store the column indices to the file */ 1306 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1307 if (!rank) { 1308 MPI_Status status; 1309 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1310 for (i=1; i<size; i++) { 1311 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1312 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1313 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1314 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1315 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1316 } 1317 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1318 } else { 1319 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1320 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1321 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1322 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1323 } 1324 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1325 1326 /* load up the local column values */ 1327 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1328 cnt = 0; 1329 for (i=0; i<mat->rmap->n; i++) { 1330 for (j=B->i[i]; j<B->i[i+1]; j++) { 1331 if (garray[B->j[j]] > cstart) break; 1332 column_values[cnt++] = B->a[j]; 1333 } 1334 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1335 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1336 } 1337 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1338 1339 /* store the column values to the file */ 1340 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1341 if (!rank) { 1342 MPI_Status status; 1343 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1344 for (i=1; i<size; i++) { 1345 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1346 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1347 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1348 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1349 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1350 } 1351 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1352 } else { 1353 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1354 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1355 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1357 } 1358 ierr = PetscFree(column_values);CHKERRQ(ierr); 1359 1360 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1361 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #include <petscdraw.h> 1366 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1367 { 1368 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1369 PetscErrorCode ierr; 1370 PetscMPIInt rank = aij->rank,size = aij->size; 1371 PetscBool isdraw,iascii,isbinary; 1372 PetscViewer sviewer; 1373 PetscViewerFormat format; 1374 1375 PetscFunctionBegin; 1376 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1377 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1378 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1379 if (iascii) { 1380 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1381 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1382 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1383 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1384 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1385 for (i=0; i<(PetscInt)size; i++) { 1386 nmax = PetscMax(nmax,nz[i]); 1387 nmin = PetscMin(nmin,nz[i]); 1388 navg += nz[i]; 1389 } 1390 ierr = PetscFree(nz);CHKERRQ(ierr); 1391 navg = navg/size; 1392 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1396 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1397 MatInfo info; 1398 PetscBool inodes; 1399 1400 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1401 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1402 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1403 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1404 if (!inodes) { 1405 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1406 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1407 } else { 1408 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1409 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1410 } 1411 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1412 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1413 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1414 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1415 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1416 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1418 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1421 PetscInt inodecount,inodelimit,*inodes; 1422 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1423 if (inodes) { 1424 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1425 } else { 1426 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1427 } 1428 PetscFunctionReturn(0); 1429 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1430 PetscFunctionReturn(0); 1431 } 1432 } else if (isbinary) { 1433 if (size == 1) { 1434 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1435 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1436 } else { 1437 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1438 } 1439 PetscFunctionReturn(0); 1440 } else if (isdraw) { 1441 PetscDraw draw; 1442 PetscBool isnull; 1443 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1444 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1445 if (isnull) PetscFunctionReturn(0); 1446 } 1447 1448 { 1449 /* assemble the entire matrix onto first processor. */ 1450 Mat A; 1451 Mat_SeqAIJ *Aloc; 1452 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1453 MatScalar *a; 1454 1455 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1456 if (!rank) { 1457 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1458 } else { 1459 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1460 } 1461 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1462 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1463 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1464 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1465 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1466 1467 /* copy over the A part */ 1468 Aloc = (Mat_SeqAIJ*)aij->A->data; 1469 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1470 row = mat->rmap->rstart; 1471 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1472 for (i=0; i<m; i++) { 1473 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1474 row++; 1475 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1476 } 1477 aj = Aloc->j; 1478 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1479 1480 /* copy over the B part */ 1481 Aloc = (Mat_SeqAIJ*)aij->B->data; 1482 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1483 row = mat->rmap->rstart; 1484 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1485 ct = cols; 1486 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1487 for (i=0; i<m; i++) { 1488 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1489 row++; 1490 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1491 } 1492 ierr = PetscFree(ct);CHKERRQ(ierr); 1493 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1494 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1495 /* 1496 Everyone has to call to draw the matrix since the graphics waits are 1497 synchronized across all processors that share the PetscDraw object 1498 */ 1499 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1500 if (!rank) { 1501 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1502 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1503 } 1504 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1506 ierr = MatDestroy(&A);CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1512 { 1513 PetscErrorCode ierr; 1514 PetscBool iascii,isdraw,issocket,isbinary; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1518 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1519 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1521 if (iascii || isdraw || isbinary || issocket) { 1522 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1528 { 1529 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1530 PetscErrorCode ierr; 1531 Vec bb1 = 0; 1532 PetscBool hasop; 1533 1534 PetscFunctionBegin; 1535 if (flag == SOR_APPLY_UPPER) { 1536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1541 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1542 } 1543 1544 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1545 if (flag & SOR_ZERO_INITIAL_GUESS) { 1546 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1547 its--; 1548 } 1549 1550 while (its--) { 1551 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1552 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1553 1554 /* update rhs: bb1 = bb - B*x */ 1555 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1556 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1557 1558 /* local sweep */ 1559 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1560 } 1561 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1562 if (flag & SOR_ZERO_INITIAL_GUESS) { 1563 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1564 its--; 1565 } 1566 while (its--) { 1567 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 1570 /* update rhs: bb1 = bb - B*x */ 1571 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1572 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1573 1574 /* local sweep */ 1575 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1576 } 1577 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1578 if (flag & SOR_ZERO_INITIAL_GUESS) { 1579 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1580 its--; 1581 } 1582 while (its--) { 1583 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 1586 /* update rhs: bb1 = bb - B*x */ 1587 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1588 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1589 1590 /* local sweep */ 1591 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1592 } 1593 } else if (flag & SOR_EISENSTAT) { 1594 Vec xx1; 1595 1596 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1597 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1598 1599 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1600 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 if (!mat->diag) { 1602 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1603 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1604 } 1605 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1606 if (hasop) { 1607 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1608 } else { 1609 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1610 } 1611 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1612 1613 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1614 1615 /* local sweep */ 1616 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1617 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1618 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1619 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1620 1621 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1622 1623 matin->factorerrortype = mat->A->factorerrortype; 1624 PetscFunctionReturn(0); 1625 } 1626 1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1628 { 1629 Mat aA,aB,Aperm; 1630 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1631 PetscScalar *aa,*ba; 1632 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1633 PetscSF rowsf,sf; 1634 IS parcolp = NULL; 1635 PetscBool done; 1636 PetscErrorCode ierr; 1637 1638 PetscFunctionBegin; 1639 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1640 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1641 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1642 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1643 1644 /* Invert row permutation to find out where my rows should go */ 1645 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1646 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1647 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1648 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1649 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1650 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1651 1652 /* Invert column permutation to find out where my columns should go */ 1653 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1654 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1655 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1656 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1657 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1658 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1659 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1660 1661 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1662 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1663 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1664 1665 /* Find out where my gcols should go */ 1666 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1667 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1668 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1669 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1670 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1671 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1672 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1674 1675 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1676 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1677 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1678 for (i=0; i<m; i++) { 1679 PetscInt row = rdest[i],rowner; 1680 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1681 for (j=ai[i]; j<ai[i+1]; j++) { 1682 PetscInt cowner,col = cdest[aj[j]]; 1683 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1684 if (rowner == cowner) dnnz[i]++; 1685 else onnz[i]++; 1686 } 1687 for (j=bi[i]; j<bi[i+1]; j++) { 1688 PetscInt cowner,col = gcdest[bj[j]]; 1689 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1690 if (rowner == cowner) dnnz[i]++; 1691 else onnz[i]++; 1692 } 1693 } 1694 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1695 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1696 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1697 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1698 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1699 1700 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1701 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1702 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1703 for (i=0; i<m; i++) { 1704 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1705 PetscInt j0,rowlen; 1706 rowlen = ai[i+1] - ai[i]; 1707 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1708 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1709 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1710 } 1711 rowlen = bi[i+1] - bi[i]; 1712 for (j0=j=0; j<rowlen; j0=j) { 1713 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1714 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1715 } 1716 } 1717 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1718 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1719 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1720 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1721 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1722 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1723 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1724 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1725 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1726 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1727 *B = Aperm; 1728 PetscFunctionReturn(0); 1729 } 1730 1731 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1732 { 1733 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1734 PetscErrorCode ierr; 1735 1736 PetscFunctionBegin; 1737 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1738 if (ghosts) *ghosts = aij->garray; 1739 PetscFunctionReturn(0); 1740 } 1741 1742 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1743 { 1744 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1745 Mat A = mat->A,B = mat->B; 1746 PetscErrorCode ierr; 1747 PetscReal isend[5],irecv[5]; 1748 1749 PetscFunctionBegin; 1750 info->block_size = 1.0; 1751 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1752 1753 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1754 isend[3] = info->memory; isend[4] = info->mallocs; 1755 1756 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1757 1758 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1759 isend[3] += info->memory; isend[4] += info->mallocs; 1760 if (flag == MAT_LOCAL) { 1761 info->nz_used = isend[0]; 1762 info->nz_allocated = isend[1]; 1763 info->nz_unneeded = isend[2]; 1764 info->memory = isend[3]; 1765 info->mallocs = isend[4]; 1766 } else if (flag == MAT_GLOBAL_MAX) { 1767 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1768 1769 info->nz_used = irecv[0]; 1770 info->nz_allocated = irecv[1]; 1771 info->nz_unneeded = irecv[2]; 1772 info->memory = irecv[3]; 1773 info->mallocs = irecv[4]; 1774 } else if (flag == MAT_GLOBAL_SUM) { 1775 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1776 1777 info->nz_used = irecv[0]; 1778 info->nz_allocated = irecv[1]; 1779 info->nz_unneeded = irecv[2]; 1780 info->memory = irecv[3]; 1781 info->mallocs = irecv[4]; 1782 } 1783 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1784 info->fill_ratio_needed = 0; 1785 info->factor_mallocs = 0; 1786 PetscFunctionReturn(0); 1787 } 1788 1789 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1790 { 1791 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1792 PetscErrorCode ierr; 1793 1794 PetscFunctionBegin; 1795 switch (op) { 1796 case MAT_NEW_NONZERO_LOCATIONS: 1797 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1798 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1799 case MAT_KEEP_NONZERO_PATTERN: 1800 case MAT_NEW_NONZERO_LOCATION_ERR: 1801 case MAT_USE_INODES: 1802 case MAT_IGNORE_ZERO_ENTRIES: 1803 MatCheckPreallocated(A,1); 1804 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1805 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1806 break; 1807 case MAT_ROW_ORIENTED: 1808 MatCheckPreallocated(A,1); 1809 a->roworiented = flg; 1810 1811 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1812 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1813 break; 1814 case MAT_NEW_DIAGONALS: 1815 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1816 break; 1817 case MAT_IGNORE_OFF_PROC_ENTRIES: 1818 a->donotstash = flg; 1819 break; 1820 case MAT_SPD: 1821 A->spd_set = PETSC_TRUE; 1822 A->spd = flg; 1823 if (flg) { 1824 A->symmetric = PETSC_TRUE; 1825 A->structurally_symmetric = PETSC_TRUE; 1826 A->symmetric_set = PETSC_TRUE; 1827 A->structurally_symmetric_set = PETSC_TRUE; 1828 } 1829 break; 1830 case MAT_SYMMETRIC: 1831 MatCheckPreallocated(A,1); 1832 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1833 break; 1834 case MAT_STRUCTURALLY_SYMMETRIC: 1835 MatCheckPreallocated(A,1); 1836 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1837 break; 1838 case MAT_HERMITIAN: 1839 MatCheckPreallocated(A,1); 1840 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1841 break; 1842 case MAT_SYMMETRY_ETERNAL: 1843 MatCheckPreallocated(A,1); 1844 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1845 break; 1846 case MAT_SUBMAT_SINGLEIS: 1847 A->submat_singleis = flg; 1848 break; 1849 case MAT_STRUCTURE_ONLY: 1850 /* The option is handled directly by MatSetOption() */ 1851 break; 1852 default: 1853 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1854 } 1855 PetscFunctionReturn(0); 1856 } 1857 1858 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1859 { 1860 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1861 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1862 PetscErrorCode ierr; 1863 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1864 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1865 PetscInt *cmap,*idx_p; 1866 1867 PetscFunctionBegin; 1868 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1869 mat->getrowactive = PETSC_TRUE; 1870 1871 if (!mat->rowvalues && (idx || v)) { 1872 /* 1873 allocate enough space to hold information from the longest row. 1874 */ 1875 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1876 PetscInt max = 1,tmp; 1877 for (i=0; i<matin->rmap->n; i++) { 1878 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1879 if (max < tmp) max = tmp; 1880 } 1881 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1882 } 1883 1884 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1885 lrow = row - rstart; 1886 1887 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1888 if (!v) {pvA = 0; pvB = 0;} 1889 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1890 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1891 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1892 nztot = nzA + nzB; 1893 1894 cmap = mat->garray; 1895 if (v || idx) { 1896 if (nztot) { 1897 /* Sort by increasing column numbers, assuming A and B already sorted */ 1898 PetscInt imark = -1; 1899 if (v) { 1900 *v = v_p = mat->rowvalues; 1901 for (i=0; i<nzB; i++) { 1902 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1903 else break; 1904 } 1905 imark = i; 1906 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1907 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1908 } 1909 if (idx) { 1910 *idx = idx_p = mat->rowindices; 1911 if (imark > -1) { 1912 for (i=0; i<imark; i++) { 1913 idx_p[i] = cmap[cworkB[i]]; 1914 } 1915 } else { 1916 for (i=0; i<nzB; i++) { 1917 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1918 else break; 1919 } 1920 imark = i; 1921 } 1922 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1923 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1924 } 1925 } else { 1926 if (idx) *idx = 0; 1927 if (v) *v = 0; 1928 } 1929 } 1930 *nz = nztot; 1931 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1932 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1933 PetscFunctionReturn(0); 1934 } 1935 1936 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1937 { 1938 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1939 1940 PetscFunctionBegin; 1941 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1942 aij->getrowactive = PETSC_FALSE; 1943 PetscFunctionReturn(0); 1944 } 1945 1946 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1947 { 1948 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1949 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1950 PetscErrorCode ierr; 1951 PetscInt i,j,cstart = mat->cmap->rstart; 1952 PetscReal sum = 0.0; 1953 MatScalar *v; 1954 1955 PetscFunctionBegin; 1956 if (aij->size == 1) { 1957 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1958 } else { 1959 if (type == NORM_FROBENIUS) { 1960 v = amat->a; 1961 for (i=0; i<amat->nz; i++) { 1962 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1963 } 1964 v = bmat->a; 1965 for (i=0; i<bmat->nz; i++) { 1966 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1967 } 1968 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1969 *norm = PetscSqrtReal(*norm); 1970 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1971 } else if (type == NORM_1) { /* max column norm */ 1972 PetscReal *tmp,*tmp2; 1973 PetscInt *jj,*garray = aij->garray; 1974 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1975 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1976 *norm = 0.0; 1977 v = amat->a; jj = amat->j; 1978 for (j=0; j<amat->nz; j++) { 1979 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1980 } 1981 v = bmat->a; jj = bmat->j; 1982 for (j=0; j<bmat->nz; j++) { 1983 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1984 } 1985 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1986 for (j=0; j<mat->cmap->N; j++) { 1987 if (tmp2[j] > *norm) *norm = tmp2[j]; 1988 } 1989 ierr = PetscFree(tmp);CHKERRQ(ierr); 1990 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1991 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1992 } else if (type == NORM_INFINITY) { /* max row norm */ 1993 PetscReal ntemp = 0.0; 1994 for (j=0; j<aij->A->rmap->n; j++) { 1995 v = amat->a + amat->i[j]; 1996 sum = 0.0; 1997 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1998 sum += PetscAbsScalar(*v); v++; 1999 } 2000 v = bmat->a + bmat->i[j]; 2001 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2002 sum += PetscAbsScalar(*v); v++; 2003 } 2004 if (sum > ntemp) ntemp = sum; 2005 } 2006 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2007 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2008 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2009 } 2010 PetscFunctionReturn(0); 2011 } 2012 2013 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2014 { 2015 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2016 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2017 PetscErrorCode ierr; 2018 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2019 PetscInt cstart = A->cmap->rstart,ncol; 2020 Mat B; 2021 MatScalar *array; 2022 2023 PetscFunctionBegin; 2024 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2025 ai = Aloc->i; aj = Aloc->j; 2026 bi = Bloc->i; bj = Bloc->j; 2027 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2028 PetscInt *d_nnz,*g_nnz,*o_nnz; 2029 PetscSFNode *oloc; 2030 PETSC_UNUSED PetscSF sf; 2031 2032 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2033 /* compute d_nnz for preallocation */ 2034 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2035 for (i=0; i<ai[ma]; i++) { 2036 d_nnz[aj[i]]++; 2037 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2038 } 2039 /* compute local off-diagonal contributions */ 2040 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2041 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2042 /* map those to global */ 2043 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2044 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2045 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2046 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2047 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2048 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2049 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2050 2051 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2052 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2053 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2054 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2055 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2056 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2057 } else { 2058 B = *matout; 2059 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2060 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2061 } 2062 2063 /* copy over the A part */ 2064 array = Aloc->a; 2065 row = A->rmap->rstart; 2066 for (i=0; i<ma; i++) { 2067 ncol = ai[i+1]-ai[i]; 2068 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2069 row++; 2070 array += ncol; aj += ncol; 2071 } 2072 aj = Aloc->j; 2073 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2074 2075 /* copy over the B part */ 2076 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2077 array = Bloc->a; 2078 row = A->rmap->rstart; 2079 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2080 cols_tmp = cols; 2081 for (i=0; i<mb; i++) { 2082 ncol = bi[i+1]-bi[i]; 2083 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2084 row++; 2085 array += ncol; cols_tmp += ncol; 2086 } 2087 ierr = PetscFree(cols);CHKERRQ(ierr); 2088 2089 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2090 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2091 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2092 *matout = B; 2093 } else { 2094 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2095 } 2096 PetscFunctionReturn(0); 2097 } 2098 2099 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2100 { 2101 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2102 Mat a = aij->A,b = aij->B; 2103 PetscErrorCode ierr; 2104 PetscInt s1,s2,s3; 2105 2106 PetscFunctionBegin; 2107 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2108 if (rr) { 2109 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2110 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2111 /* Overlap communication with computation. */ 2112 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2113 } 2114 if (ll) { 2115 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2116 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2117 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2118 } 2119 /* scale the diagonal block */ 2120 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2121 2122 if (rr) { 2123 /* Do a scatter end and then right scale the off-diagonal block */ 2124 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2125 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2126 } 2127 PetscFunctionReturn(0); 2128 } 2129 2130 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2131 { 2132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2137 PetscFunctionReturn(0); 2138 } 2139 2140 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2141 { 2142 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2143 Mat a,b,c,d; 2144 PetscBool flg; 2145 PetscErrorCode ierr; 2146 2147 PetscFunctionBegin; 2148 a = matA->A; b = matA->B; 2149 c = matB->A; d = matB->B; 2150 2151 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2152 if (flg) { 2153 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2154 } 2155 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2156 PetscFunctionReturn(0); 2157 } 2158 2159 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2160 { 2161 PetscErrorCode ierr; 2162 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2163 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2164 2165 PetscFunctionBegin; 2166 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2167 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2168 /* because of the column compression in the off-processor part of the matrix a->B, 2169 the number of columns in a->B and b->B may be different, hence we cannot call 2170 the MatCopy() directly on the two parts. If need be, we can provide a more 2171 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2172 then copying the submatrices */ 2173 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2174 } else { 2175 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2176 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2177 } 2178 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2179 PetscFunctionReturn(0); 2180 } 2181 2182 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2183 { 2184 PetscErrorCode ierr; 2185 2186 PetscFunctionBegin; 2187 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2188 PetscFunctionReturn(0); 2189 } 2190 2191 /* 2192 Computes the number of nonzeros per row needed for preallocation when X and Y 2193 have different nonzero structure. 2194 */ 2195 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2196 { 2197 PetscInt i,j,k,nzx,nzy; 2198 2199 PetscFunctionBegin; 2200 /* Set the number of nonzeros in the new matrix */ 2201 for (i=0; i<m; i++) { 2202 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2203 nzx = xi[i+1] - xi[i]; 2204 nzy = yi[i+1] - yi[i]; 2205 nnz[i] = 0; 2206 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2207 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2208 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2209 nnz[i]++; 2210 } 2211 for (; k<nzy; k++) nnz[i]++; 2212 } 2213 PetscFunctionReturn(0); 2214 } 2215 2216 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2217 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2218 { 2219 PetscErrorCode ierr; 2220 PetscInt m = Y->rmap->N; 2221 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2222 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2223 2224 PetscFunctionBegin; 2225 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2226 PetscFunctionReturn(0); 2227 } 2228 2229 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2230 { 2231 PetscErrorCode ierr; 2232 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2233 PetscBLASInt bnz,one=1; 2234 Mat_SeqAIJ *x,*y; 2235 2236 PetscFunctionBegin; 2237 if (str == SAME_NONZERO_PATTERN) { 2238 PetscScalar alpha = a; 2239 x = (Mat_SeqAIJ*)xx->A->data; 2240 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2241 y = (Mat_SeqAIJ*)yy->A->data; 2242 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2243 x = (Mat_SeqAIJ*)xx->B->data; 2244 y = (Mat_SeqAIJ*)yy->B->data; 2245 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2246 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2247 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2248 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2249 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2250 } else { 2251 Mat B; 2252 PetscInt *nnz_d,*nnz_o; 2253 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2254 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2255 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2256 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2257 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2258 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2259 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2260 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2261 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2262 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2263 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2264 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2265 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2266 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2267 } 2268 PetscFunctionReturn(0); 2269 } 2270 2271 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2272 2273 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2274 { 2275 #if defined(PETSC_USE_COMPLEX) 2276 PetscErrorCode ierr; 2277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2278 2279 PetscFunctionBegin; 2280 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2281 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2282 #else 2283 PetscFunctionBegin; 2284 #endif 2285 PetscFunctionReturn(0); 2286 } 2287 2288 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2289 { 2290 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2291 PetscErrorCode ierr; 2292 2293 PetscFunctionBegin; 2294 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2295 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2296 PetscFunctionReturn(0); 2297 } 2298 2299 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2300 { 2301 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2302 PetscErrorCode ierr; 2303 2304 PetscFunctionBegin; 2305 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2306 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2307 PetscFunctionReturn(0); 2308 } 2309 2310 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2311 { 2312 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2313 PetscErrorCode ierr; 2314 PetscInt i,*idxb = 0; 2315 PetscScalar *va,*vb; 2316 Vec vtmp; 2317 2318 PetscFunctionBegin; 2319 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2320 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2321 if (idx) { 2322 for (i=0; i<A->rmap->n; i++) { 2323 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2324 } 2325 } 2326 2327 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2328 if (idx) { 2329 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2330 } 2331 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2332 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2333 2334 for (i=0; i<A->rmap->n; i++) { 2335 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2336 va[i] = vb[i]; 2337 if (idx) idx[i] = a->garray[idxb[i]]; 2338 } 2339 } 2340 2341 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2342 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2343 ierr = PetscFree(idxb);CHKERRQ(ierr); 2344 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2345 PetscFunctionReturn(0); 2346 } 2347 2348 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2349 { 2350 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2351 PetscErrorCode ierr; 2352 PetscInt i,*idxb = 0; 2353 PetscScalar *va,*vb; 2354 Vec vtmp; 2355 2356 PetscFunctionBegin; 2357 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2358 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2359 if (idx) { 2360 for (i=0; i<A->cmap->n; i++) { 2361 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2362 } 2363 } 2364 2365 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2366 if (idx) { 2367 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2368 } 2369 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2370 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2371 2372 for (i=0; i<A->rmap->n; i++) { 2373 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2374 va[i] = vb[i]; 2375 if (idx) idx[i] = a->garray[idxb[i]]; 2376 } 2377 } 2378 2379 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2380 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2381 ierr = PetscFree(idxb);CHKERRQ(ierr); 2382 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2383 PetscFunctionReturn(0); 2384 } 2385 2386 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2387 { 2388 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2389 PetscInt n = A->rmap->n; 2390 PetscInt cstart = A->cmap->rstart; 2391 PetscInt *cmap = mat->garray; 2392 PetscInt *diagIdx, *offdiagIdx; 2393 Vec diagV, offdiagV; 2394 PetscScalar *a, *diagA, *offdiagA; 2395 PetscInt r; 2396 PetscErrorCode ierr; 2397 2398 PetscFunctionBegin; 2399 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2400 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2401 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2402 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2403 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2404 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2405 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2406 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2407 for (r = 0; r < n; ++r) { 2408 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2409 a[r] = diagA[r]; 2410 idx[r] = cstart + diagIdx[r]; 2411 } else { 2412 a[r] = offdiagA[r]; 2413 idx[r] = cmap[offdiagIdx[r]]; 2414 } 2415 } 2416 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2417 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2418 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2419 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2420 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2421 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2426 { 2427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2428 PetscInt n = A->rmap->n; 2429 PetscInt cstart = A->cmap->rstart; 2430 PetscInt *cmap = mat->garray; 2431 PetscInt *diagIdx, *offdiagIdx; 2432 Vec diagV, offdiagV; 2433 PetscScalar *a, *diagA, *offdiagA; 2434 PetscInt r; 2435 PetscErrorCode ierr; 2436 2437 PetscFunctionBegin; 2438 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2439 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2440 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2441 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2442 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2443 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2444 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2445 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2446 for (r = 0; r < n; ++r) { 2447 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2448 a[r] = diagA[r]; 2449 idx[r] = cstart + diagIdx[r]; 2450 } else { 2451 a[r] = offdiagA[r]; 2452 idx[r] = cmap[offdiagIdx[r]]; 2453 } 2454 } 2455 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2456 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2457 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2458 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2459 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2460 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2465 { 2466 PetscErrorCode ierr; 2467 Mat *dummy; 2468 2469 PetscFunctionBegin; 2470 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2471 *newmat = *dummy; 2472 ierr = PetscFree(dummy);CHKERRQ(ierr); 2473 PetscFunctionReturn(0); 2474 } 2475 2476 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2477 { 2478 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2479 PetscErrorCode ierr; 2480 2481 PetscFunctionBegin; 2482 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2483 A->factorerrortype = a->A->factorerrortype; 2484 PetscFunctionReturn(0); 2485 } 2486 2487 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2488 { 2489 PetscErrorCode ierr; 2490 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2491 2492 PetscFunctionBegin; 2493 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2494 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2495 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2496 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2501 { 2502 PetscFunctionBegin; 2503 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2504 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2505 PetscFunctionReturn(0); 2506 } 2507 2508 /*@ 2509 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2510 2511 Collective on Mat 2512 2513 Input Parameters: 2514 + A - the matrix 2515 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2516 2517 Level: advanced 2518 2519 @*/ 2520 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2521 { 2522 PetscErrorCode ierr; 2523 2524 PetscFunctionBegin; 2525 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2526 PetscFunctionReturn(0); 2527 } 2528 2529 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2530 { 2531 PetscErrorCode ierr; 2532 PetscBool sc = PETSC_FALSE,flg; 2533 2534 PetscFunctionBegin; 2535 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2536 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2537 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2538 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2539 if (flg) { 2540 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2541 } 2542 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2547 { 2548 PetscErrorCode ierr; 2549 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2550 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2551 2552 PetscFunctionBegin; 2553 if (!Y->preallocated) { 2554 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2555 } else if (!aij->nz) { 2556 PetscInt nonew = aij->nonew; 2557 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2558 aij->nonew = nonew; 2559 } 2560 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2561 PetscFunctionReturn(0); 2562 } 2563 2564 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2565 { 2566 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2567 PetscErrorCode ierr; 2568 2569 PetscFunctionBegin; 2570 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2571 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2572 if (d) { 2573 PetscInt rstart; 2574 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2575 *d += rstart; 2576 2577 } 2578 PetscFunctionReturn(0); 2579 } 2580 2581 2582 /* -------------------------------------------------------------------*/ 2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2584 MatGetRow_MPIAIJ, 2585 MatRestoreRow_MPIAIJ, 2586 MatMult_MPIAIJ, 2587 /* 4*/ MatMultAdd_MPIAIJ, 2588 MatMultTranspose_MPIAIJ, 2589 MatMultTransposeAdd_MPIAIJ, 2590 0, 2591 0, 2592 0, 2593 /*10*/ 0, 2594 0, 2595 0, 2596 MatSOR_MPIAIJ, 2597 MatTranspose_MPIAIJ, 2598 /*15*/ MatGetInfo_MPIAIJ, 2599 MatEqual_MPIAIJ, 2600 MatGetDiagonal_MPIAIJ, 2601 MatDiagonalScale_MPIAIJ, 2602 MatNorm_MPIAIJ, 2603 /*20*/ MatAssemblyBegin_MPIAIJ, 2604 MatAssemblyEnd_MPIAIJ, 2605 MatSetOption_MPIAIJ, 2606 MatZeroEntries_MPIAIJ, 2607 /*24*/ MatZeroRows_MPIAIJ, 2608 0, 2609 0, 2610 0, 2611 0, 2612 /*29*/ MatSetUp_MPIAIJ, 2613 0, 2614 0, 2615 MatGetDiagonalBlock_MPIAIJ, 2616 0, 2617 /*34*/ MatDuplicate_MPIAIJ, 2618 0, 2619 0, 2620 0, 2621 0, 2622 /*39*/ MatAXPY_MPIAIJ, 2623 MatCreateSubMatrices_MPIAIJ, 2624 MatIncreaseOverlap_MPIAIJ, 2625 MatGetValues_MPIAIJ, 2626 MatCopy_MPIAIJ, 2627 /*44*/ MatGetRowMax_MPIAIJ, 2628 MatScale_MPIAIJ, 2629 MatShift_MPIAIJ, 2630 MatDiagonalSet_MPIAIJ, 2631 MatZeroRowsColumns_MPIAIJ, 2632 /*49*/ MatSetRandom_MPIAIJ, 2633 0, 2634 0, 2635 0, 2636 0, 2637 /*54*/ MatFDColoringCreate_MPIXAIJ, 2638 0, 2639 MatSetUnfactored_MPIAIJ, 2640 MatPermute_MPIAIJ, 2641 0, 2642 /*59*/ MatCreateSubMatrix_MPIAIJ, 2643 MatDestroy_MPIAIJ, 2644 MatView_MPIAIJ, 2645 0, 2646 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2647 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2648 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2649 0, 2650 0, 2651 0, 2652 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2653 MatGetRowMinAbs_MPIAIJ, 2654 0, 2655 0, 2656 0, 2657 0, 2658 /*75*/ MatFDColoringApply_AIJ, 2659 MatSetFromOptions_MPIAIJ, 2660 0, 2661 0, 2662 MatFindZeroDiagonals_MPIAIJ, 2663 /*80*/ 0, 2664 0, 2665 0, 2666 /*83*/ MatLoad_MPIAIJ, 2667 MatIsSymmetric_MPIAIJ, 2668 0, 2669 0, 2670 0, 2671 0, 2672 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2673 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2674 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2675 MatPtAP_MPIAIJ_MPIAIJ, 2676 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2677 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2678 0, 2679 0, 2680 0, 2681 0, 2682 /*99*/ 0, 2683 0, 2684 0, 2685 MatConjugate_MPIAIJ, 2686 0, 2687 /*104*/MatSetValuesRow_MPIAIJ, 2688 MatRealPart_MPIAIJ, 2689 MatImaginaryPart_MPIAIJ, 2690 0, 2691 0, 2692 /*109*/0, 2693 0, 2694 MatGetRowMin_MPIAIJ, 2695 0, 2696 MatMissingDiagonal_MPIAIJ, 2697 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2698 0, 2699 MatGetGhosts_MPIAIJ, 2700 0, 2701 0, 2702 /*119*/0, 2703 0, 2704 0, 2705 0, 2706 MatGetMultiProcBlock_MPIAIJ, 2707 /*124*/MatFindNonzeroRows_MPIAIJ, 2708 MatGetColumnNorms_MPIAIJ, 2709 MatInvertBlockDiagonal_MPIAIJ, 2710 0, 2711 MatCreateSubMatricesMPI_MPIAIJ, 2712 /*129*/0, 2713 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2714 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2715 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2716 0, 2717 /*134*/0, 2718 0, 2719 MatRARt_MPIAIJ_MPIAIJ, 2720 0, 2721 0, 2722 /*139*/MatSetBlockSizes_MPIAIJ, 2723 0, 2724 0, 2725 MatFDColoringSetUp_MPIXAIJ, 2726 MatFindOffBlockDiagonalEntries_MPIAIJ, 2727 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2728 }; 2729 2730 /* ----------------------------------------------------------------------------------------*/ 2731 2732 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2733 { 2734 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2735 PetscErrorCode ierr; 2736 2737 PetscFunctionBegin; 2738 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2739 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2740 PetscFunctionReturn(0); 2741 } 2742 2743 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2744 { 2745 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2746 PetscErrorCode ierr; 2747 2748 PetscFunctionBegin; 2749 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2750 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2751 PetscFunctionReturn(0); 2752 } 2753 2754 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2755 { 2756 Mat_MPIAIJ *b; 2757 PetscErrorCode ierr; 2758 2759 PetscFunctionBegin; 2760 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2761 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2762 b = (Mat_MPIAIJ*)B->data; 2763 2764 #if defined(PETSC_USE_CTABLE) 2765 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2766 #else 2767 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2768 #endif 2769 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2770 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2771 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2772 2773 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2774 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2775 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2776 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2777 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2778 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2779 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2780 2781 if (!B->preallocated) { 2782 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2783 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2784 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2785 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2786 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2787 } 2788 2789 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2790 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2791 B->preallocated = PETSC_TRUE; 2792 B->was_assembled = PETSC_FALSE; 2793 B->assembled = PETSC_FALSE;; 2794 PetscFunctionReturn(0); 2795 } 2796 2797 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2798 { 2799 Mat_MPIAIJ *b; 2800 PetscErrorCode ierr; 2801 2802 PetscFunctionBegin; 2803 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2804 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2805 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2806 b = (Mat_MPIAIJ*)B->data; 2807 2808 #if defined(PETSC_USE_CTABLE) 2809 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2810 #else 2811 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2812 #endif 2813 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2814 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2815 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2816 2817 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2818 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2819 B->preallocated = PETSC_TRUE; 2820 B->was_assembled = PETSC_FALSE; 2821 B->assembled = PETSC_FALSE; 2822 PetscFunctionReturn(0); 2823 } 2824 2825 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2826 { 2827 Mat mat; 2828 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2829 PetscErrorCode ierr; 2830 2831 PetscFunctionBegin; 2832 *newmat = 0; 2833 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2834 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2835 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2836 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2837 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2838 a = (Mat_MPIAIJ*)mat->data; 2839 2840 mat->factortype = matin->factortype; 2841 mat->assembled = PETSC_TRUE; 2842 mat->insertmode = NOT_SET_VALUES; 2843 mat->preallocated = PETSC_TRUE; 2844 2845 a->size = oldmat->size; 2846 a->rank = oldmat->rank; 2847 a->donotstash = oldmat->donotstash; 2848 a->roworiented = oldmat->roworiented; 2849 a->rowindices = 0; 2850 a->rowvalues = 0; 2851 a->getrowactive = PETSC_FALSE; 2852 2853 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2854 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2855 2856 if (oldmat->colmap) { 2857 #if defined(PETSC_USE_CTABLE) 2858 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2859 #else 2860 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2861 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2862 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2863 #endif 2864 } else a->colmap = 0; 2865 if (oldmat->garray) { 2866 PetscInt len; 2867 len = oldmat->B->cmap->n; 2868 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2869 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2870 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2871 } else a->garray = 0; 2872 2873 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2874 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2875 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2876 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2877 2878 if (oldmat->Mvctx_mpi1) { 2879 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2880 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2881 } 2882 2883 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2884 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2885 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2886 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2887 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2888 *newmat = mat; 2889 PetscFunctionReturn(0); 2890 } 2891 2892 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2893 { 2894 PetscScalar *vals,*svals; 2895 MPI_Comm comm; 2896 PetscErrorCode ierr; 2897 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2898 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2899 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2900 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2901 PetscInt cend,cstart,n,*rowners; 2902 int fd; 2903 PetscInt bs = newMat->rmap->bs; 2904 2905 PetscFunctionBegin; 2906 /* force binary viewer to load .info file if it has not yet done so */ 2907 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2908 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2909 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2910 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2911 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2912 if (!rank) { 2913 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2914 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2915 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2916 } 2917 2918 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2919 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2920 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2921 if (bs < 0) bs = 1; 2922 2923 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2924 M = header[1]; N = header[2]; 2925 2926 /* If global sizes are set, check if they are consistent with that given in the file */ 2927 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2928 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2929 2930 /* determine ownership of all (block) rows */ 2931 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2932 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2933 else m = newMat->rmap->n; /* Set by user */ 2934 2935 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2936 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2937 2938 /* First process needs enough room for process with most rows */ 2939 if (!rank) { 2940 mmax = rowners[1]; 2941 for (i=2; i<=size; i++) { 2942 mmax = PetscMax(mmax, rowners[i]); 2943 } 2944 } else mmax = -1; /* unused, but compilers complain */ 2945 2946 rowners[0] = 0; 2947 for (i=2; i<=size; i++) { 2948 rowners[i] += rowners[i-1]; 2949 } 2950 rstart = rowners[rank]; 2951 rend = rowners[rank+1]; 2952 2953 /* distribute row lengths to all processors */ 2954 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2955 if (!rank) { 2956 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2957 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2958 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2959 for (j=0; j<m; j++) { 2960 procsnz[0] += ourlens[j]; 2961 } 2962 for (i=1; i<size; i++) { 2963 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2964 /* calculate the number of nonzeros on each processor */ 2965 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2966 procsnz[i] += rowlengths[j]; 2967 } 2968 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2969 } 2970 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2971 } else { 2972 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2973 } 2974 2975 if (!rank) { 2976 /* determine max buffer needed and allocate it */ 2977 maxnz = 0; 2978 for (i=0; i<size; i++) { 2979 maxnz = PetscMax(maxnz,procsnz[i]); 2980 } 2981 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2982 2983 /* read in my part of the matrix column indices */ 2984 nz = procsnz[0]; 2985 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2986 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2987 2988 /* read in every one elses and ship off */ 2989 for (i=1; i<size; i++) { 2990 nz = procsnz[i]; 2991 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2992 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2993 } 2994 ierr = PetscFree(cols);CHKERRQ(ierr); 2995 } else { 2996 /* determine buffer space needed for message */ 2997 nz = 0; 2998 for (i=0; i<m; i++) { 2999 nz += ourlens[i]; 3000 } 3001 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3002 3003 /* receive message of column indices*/ 3004 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3005 } 3006 3007 /* determine column ownership if matrix is not square */ 3008 if (N != M) { 3009 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3010 else n = newMat->cmap->n; 3011 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3012 cstart = cend - n; 3013 } else { 3014 cstart = rstart; 3015 cend = rend; 3016 n = cend - cstart; 3017 } 3018 3019 /* loop over local rows, determining number of off diagonal entries */ 3020 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3021 jj = 0; 3022 for (i=0; i<m; i++) { 3023 for (j=0; j<ourlens[i]; j++) { 3024 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3025 jj++; 3026 } 3027 } 3028 3029 for (i=0; i<m; i++) { 3030 ourlens[i] -= offlens[i]; 3031 } 3032 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3033 3034 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3035 3036 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3037 3038 for (i=0; i<m; i++) { 3039 ourlens[i] += offlens[i]; 3040 } 3041 3042 if (!rank) { 3043 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3044 3045 /* read in my part of the matrix numerical values */ 3046 nz = procsnz[0]; 3047 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3048 3049 /* insert into matrix */ 3050 jj = rstart; 3051 smycols = mycols; 3052 svals = vals; 3053 for (i=0; i<m; i++) { 3054 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3055 smycols += ourlens[i]; 3056 svals += ourlens[i]; 3057 jj++; 3058 } 3059 3060 /* read in other processors and ship out */ 3061 for (i=1; i<size; i++) { 3062 nz = procsnz[i]; 3063 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3064 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3065 } 3066 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3067 } else { 3068 /* receive numeric values */ 3069 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3070 3071 /* receive message of values*/ 3072 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3073 3074 /* insert into matrix */ 3075 jj = rstart; 3076 smycols = mycols; 3077 svals = vals; 3078 for (i=0; i<m; i++) { 3079 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3080 smycols += ourlens[i]; 3081 svals += ourlens[i]; 3082 jj++; 3083 } 3084 } 3085 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3086 ierr = PetscFree(vals);CHKERRQ(ierr); 3087 ierr = PetscFree(mycols);CHKERRQ(ierr); 3088 ierr = PetscFree(rowners);CHKERRQ(ierr); 3089 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3090 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3091 PetscFunctionReturn(0); 3092 } 3093 3094 /* Not scalable because of ISAllGather() unless getting all columns. */ 3095 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3096 { 3097 PetscErrorCode ierr; 3098 IS iscol_local; 3099 PetscBool isstride; 3100 PetscMPIInt lisstride=0,gisstride; 3101 3102 PetscFunctionBegin; 3103 /* check if we are grabbing all columns*/ 3104 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3105 3106 if (isstride) { 3107 PetscInt start,len,mstart,mlen; 3108 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3109 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3110 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3111 if (mstart == start && mlen-mstart == len) lisstride = 1; 3112 } 3113 3114 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3115 if (gisstride) { 3116 PetscInt N; 3117 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3118 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3119 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3120 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3121 } else { 3122 PetscInt cbs; 3123 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3124 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3125 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3126 } 3127 3128 *isseq = iscol_local; 3129 PetscFunctionReturn(0); 3130 } 3131 3132 /* 3133 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3134 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3135 3136 Input Parameters: 3137 mat - matrix 3138 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3139 i.e., mat->rstart <= isrow[i] < mat->rend 3140 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3141 i.e., mat->cstart <= iscol[i] < mat->cend 3142 Output Parameter: 3143 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3144 iscol_o - sequential column index set for retrieving mat->B 3145 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3146 */ 3147 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3148 { 3149 PetscErrorCode ierr; 3150 Vec x,cmap; 3151 const PetscInt *is_idx; 3152 PetscScalar *xarray,*cmaparray; 3153 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3154 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3155 Mat B=a->B; 3156 Vec lvec=a->lvec,lcmap; 3157 PetscInt i,cstart,cend,Bn=B->cmap->N; 3158 MPI_Comm comm; 3159 VecScatter Mvctx=a->Mvctx; 3160 3161 PetscFunctionBegin; 3162 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3163 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3164 3165 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3166 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3167 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3168 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3169 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3170 3171 /* Get start indices */ 3172 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3173 isstart -= ncols; 3174 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3175 3176 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3177 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3178 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3179 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3180 for (i=0; i<ncols; i++) { 3181 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3182 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3183 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3184 } 3185 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3186 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3187 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3188 3189 /* Get iscol_d */ 3190 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3191 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3192 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3193 3194 /* Get isrow_d */ 3195 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3196 rstart = mat->rmap->rstart; 3197 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3198 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3199 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3200 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3201 3202 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3203 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3204 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3205 3206 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3207 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3208 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3209 3210 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3211 3212 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3213 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3214 3215 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3216 /* off-process column indices */ 3217 count = 0; 3218 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3219 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3220 3221 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3222 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3223 for (i=0; i<Bn; i++) { 3224 if (PetscRealPart(xarray[i]) > -1.0) { 3225 idx[count] = i; /* local column index in off-diagonal part B */ 3226 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3227 count++; 3228 } 3229 } 3230 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3231 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3232 3233 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3234 /* cannot ensure iscol_o has same blocksize as iscol! */ 3235 3236 ierr = PetscFree(idx);CHKERRQ(ierr); 3237 *garray = cmap1; 3238 3239 ierr = VecDestroy(&x);CHKERRQ(ierr); 3240 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3241 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3242 PetscFunctionReturn(0); 3243 } 3244 3245 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3246 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3247 { 3248 PetscErrorCode ierr; 3249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3250 Mat M = NULL; 3251 MPI_Comm comm; 3252 IS iscol_d,isrow_d,iscol_o; 3253 Mat Asub = NULL,Bsub = NULL; 3254 PetscInt n; 3255 3256 PetscFunctionBegin; 3257 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3258 3259 if (call == MAT_REUSE_MATRIX) { 3260 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3261 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3262 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3263 3264 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3265 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3266 3267 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3268 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3269 3270 /* Update diagonal and off-diagonal portions of submat */ 3271 asub = (Mat_MPIAIJ*)(*submat)->data; 3272 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3273 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3274 if (n) { 3275 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3276 } 3277 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3278 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3279 3280 } else { /* call == MAT_INITIAL_MATRIX) */ 3281 const PetscInt *garray; 3282 PetscInt BsubN; 3283 3284 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3285 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3286 3287 /* Create local submatrices Asub and Bsub */ 3288 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3289 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3290 3291 /* Create submatrix M */ 3292 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3293 3294 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3295 asub = (Mat_MPIAIJ*)M->data; 3296 3297 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3298 n = asub->B->cmap->N; 3299 if (BsubN > n) { 3300 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3301 const PetscInt *idx; 3302 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3303 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3304 3305 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3306 j = 0; 3307 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3308 for (i=0; i<n; i++) { 3309 if (j >= BsubN) break; 3310 while (subgarray[i] > garray[j]) j++; 3311 3312 if (subgarray[i] == garray[j]) { 3313 idx_new[i] = idx[j++]; 3314 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3315 } 3316 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3317 3318 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3319 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3320 3321 } else if (BsubN < n) { 3322 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3323 } 3324 3325 ierr = PetscFree(garray);CHKERRQ(ierr); 3326 *submat = M; 3327 3328 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3329 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3330 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3331 3332 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3333 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3334 3335 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3336 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3337 } 3338 PetscFunctionReturn(0); 3339 } 3340 3341 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3342 { 3343 PetscErrorCode ierr; 3344 IS iscol_local=NULL,isrow_d; 3345 PetscInt csize; 3346 PetscInt n,i,j,start,end; 3347 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3348 MPI_Comm comm; 3349 3350 PetscFunctionBegin; 3351 /* If isrow has same processor distribution as mat, 3352 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3353 if (call == MAT_REUSE_MATRIX) { 3354 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3355 if (isrow_d) { 3356 sameRowDist = PETSC_TRUE; 3357 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3358 } else { 3359 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3360 if (iscol_local) { 3361 sameRowDist = PETSC_TRUE; 3362 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3363 } 3364 } 3365 } else { 3366 /* Check if isrow has same processor distribution as mat */ 3367 sameDist[0] = PETSC_FALSE; 3368 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3369 if (!n) { 3370 sameDist[0] = PETSC_TRUE; 3371 } else { 3372 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3373 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3374 if (i >= start && j < end) { 3375 sameDist[0] = PETSC_TRUE; 3376 } 3377 } 3378 3379 /* Check if iscol has same processor distribution as mat */ 3380 sameDist[1] = PETSC_FALSE; 3381 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3382 if (!n) { 3383 sameDist[1] = PETSC_TRUE; 3384 } else { 3385 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3386 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3387 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3388 } 3389 3390 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3391 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3392 sameRowDist = tsameDist[0]; 3393 } 3394 3395 if (sameRowDist) { 3396 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3397 /* isrow and iscol have same processor distribution as mat */ 3398 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3399 PetscFunctionReturn(0); 3400 } else { /* sameRowDist */ 3401 /* isrow has same processor distribution as mat */ 3402 if (call == MAT_INITIAL_MATRIX) { 3403 PetscBool sorted; 3404 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3405 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3406 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3407 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3408 3409 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3410 if (sorted) { 3411 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3412 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3413 PetscFunctionReturn(0); 3414 } 3415 } else { /* call == MAT_REUSE_MATRIX */ 3416 IS iscol_sub; 3417 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3418 if (iscol_sub) { 3419 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3420 PetscFunctionReturn(0); 3421 } 3422 } 3423 } 3424 } 3425 3426 /* General case: iscol -> iscol_local which has global size of iscol */ 3427 if (call == MAT_REUSE_MATRIX) { 3428 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3429 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3430 } else { 3431 if (!iscol_local) { 3432 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3433 } 3434 } 3435 3436 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3437 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3438 3439 if (call == MAT_INITIAL_MATRIX) { 3440 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3441 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3442 } 3443 PetscFunctionReturn(0); 3444 } 3445 3446 /*@C 3447 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3448 and "off-diagonal" part of the matrix in CSR format. 3449 3450 Collective on MPI_Comm 3451 3452 Input Parameters: 3453 + comm - MPI communicator 3454 . A - "diagonal" portion of matrix 3455 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3456 - garray - global index of B columns 3457 3458 Output Parameter: 3459 . mat - the matrix, with input A as its local diagonal matrix 3460 Level: advanced 3461 3462 Notes: 3463 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3464 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3465 3466 .seealso: MatCreateMPIAIJWithSplitArrays() 3467 @*/ 3468 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3469 { 3470 PetscErrorCode ierr; 3471 Mat_MPIAIJ *maij; 3472 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3473 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3474 PetscScalar *oa=b->a; 3475 Mat Bnew; 3476 PetscInt m,n,N; 3477 3478 PetscFunctionBegin; 3479 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3480 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3481 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3482 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3483 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3484 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3485 3486 /* Get global columns of mat */ 3487 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3488 3489 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3490 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3491 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3492 maij = (Mat_MPIAIJ*)(*mat)->data; 3493 3494 (*mat)->preallocated = PETSC_TRUE; 3495 3496 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3497 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3498 3499 /* Set A as diagonal portion of *mat */ 3500 maij->A = A; 3501 3502 nz = oi[m]; 3503 for (i=0; i<nz; i++) { 3504 col = oj[i]; 3505 oj[i] = garray[col]; 3506 } 3507 3508 /* Set Bnew as off-diagonal portion of *mat */ 3509 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3510 bnew = (Mat_SeqAIJ*)Bnew->data; 3511 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3512 maij->B = Bnew; 3513 3514 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3515 3516 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3517 b->free_a = PETSC_FALSE; 3518 b->free_ij = PETSC_FALSE; 3519 ierr = MatDestroy(&B);CHKERRQ(ierr); 3520 3521 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3522 bnew->free_a = PETSC_TRUE; 3523 bnew->free_ij = PETSC_TRUE; 3524 3525 /* condense columns of maij->B */ 3526 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3527 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3528 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3529 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3530 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3531 PetscFunctionReturn(0); 3532 } 3533 3534 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3535 3536 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3537 { 3538 PetscErrorCode ierr; 3539 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3540 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3541 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3542 Mat M,Msub,B=a->B; 3543 MatScalar *aa; 3544 Mat_SeqAIJ *aij; 3545 PetscInt *garray = a->garray,*colsub,Ncols; 3546 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3547 IS iscol_sub,iscmap; 3548 const PetscInt *is_idx,*cmap; 3549 PetscBool allcolumns=PETSC_FALSE; 3550 MPI_Comm comm; 3551 3552 PetscFunctionBegin; 3553 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3554 3555 if (call == MAT_REUSE_MATRIX) { 3556 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3557 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3558 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3559 3560 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3561 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3562 3563 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3564 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3565 3566 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3567 3568 } else { /* call == MAT_INITIAL_MATRIX) */ 3569 PetscBool flg; 3570 3571 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3572 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3573 3574 /* (1) iscol -> nonscalable iscol_local */ 3575 /* Check for special case: each processor gets entire matrix columns */ 3576 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3577 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3578 if (allcolumns) { 3579 iscol_sub = iscol_local; 3580 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3581 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3582 3583 } else { 3584 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3585 PetscInt *idx,*cmap1,k; 3586 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3587 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3588 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3589 count = 0; 3590 k = 0; 3591 for (i=0; i<Ncols; i++) { 3592 j = is_idx[i]; 3593 if (j >= cstart && j < cend) { 3594 /* diagonal part of mat */ 3595 idx[count] = j; 3596 cmap1[count++] = i; /* column index in submat */ 3597 } else if (Bn) { 3598 /* off-diagonal part of mat */ 3599 if (j == garray[k]) { 3600 idx[count] = j; 3601 cmap1[count++] = i; /* column index in submat */ 3602 } else if (j > garray[k]) { 3603 while (j > garray[k] && k < Bn-1) k++; 3604 if (j == garray[k]) { 3605 idx[count] = j; 3606 cmap1[count++] = i; /* column index in submat */ 3607 } 3608 } 3609 } 3610 } 3611 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3612 3613 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3614 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3615 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3616 3617 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3618 } 3619 3620 /* (3) Create sequential Msub */ 3621 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3622 } 3623 3624 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3625 aij = (Mat_SeqAIJ*)(Msub)->data; 3626 ii = aij->i; 3627 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3628 3629 /* 3630 m - number of local rows 3631 Ncols - number of columns (same on all processors) 3632 rstart - first row in new global matrix generated 3633 */ 3634 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3635 3636 if (call == MAT_INITIAL_MATRIX) { 3637 /* (4) Create parallel newmat */ 3638 PetscMPIInt rank,size; 3639 PetscInt csize; 3640 3641 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3642 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3643 3644 /* 3645 Determine the number of non-zeros in the diagonal and off-diagonal 3646 portions of the matrix in order to do correct preallocation 3647 */ 3648 3649 /* first get start and end of "diagonal" columns */ 3650 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3651 if (csize == PETSC_DECIDE) { 3652 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3653 if (mglobal == Ncols) { /* square matrix */ 3654 nlocal = m; 3655 } else { 3656 nlocal = Ncols/size + ((Ncols % size) > rank); 3657 } 3658 } else { 3659 nlocal = csize; 3660 } 3661 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3662 rstart = rend - nlocal; 3663 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3664 3665 /* next, compute all the lengths */ 3666 jj = aij->j; 3667 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3668 olens = dlens + m; 3669 for (i=0; i<m; i++) { 3670 jend = ii[i+1] - ii[i]; 3671 olen = 0; 3672 dlen = 0; 3673 for (j=0; j<jend; j++) { 3674 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3675 else dlen++; 3676 jj++; 3677 } 3678 olens[i] = olen; 3679 dlens[i] = dlen; 3680 } 3681 3682 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3683 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3684 3685 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3686 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3687 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3688 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3689 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3690 ierr = PetscFree(dlens);CHKERRQ(ierr); 3691 3692 } else { /* call == MAT_REUSE_MATRIX */ 3693 M = *newmat; 3694 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3695 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3696 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3697 /* 3698 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3699 rather than the slower MatSetValues(). 3700 */ 3701 M->was_assembled = PETSC_TRUE; 3702 M->assembled = PETSC_FALSE; 3703 } 3704 3705 /* (5) Set values of Msub to *newmat */ 3706 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3707 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3708 3709 jj = aij->j; 3710 aa = aij->a; 3711 for (i=0; i<m; i++) { 3712 row = rstart + i; 3713 nz = ii[i+1] - ii[i]; 3714 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3715 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3716 jj += nz; aa += nz; 3717 } 3718 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3719 3720 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3721 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3722 3723 ierr = PetscFree(colsub);CHKERRQ(ierr); 3724 3725 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3726 if (call == MAT_INITIAL_MATRIX) { 3727 *newmat = M; 3728 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3729 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3730 3731 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3732 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3733 3734 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3735 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3736 3737 if (iscol_local) { 3738 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3739 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3740 } 3741 } 3742 PetscFunctionReturn(0); 3743 } 3744 3745 /* 3746 Not great since it makes two copies of the submatrix, first an SeqAIJ 3747 in local and then by concatenating the local matrices the end result. 3748 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3749 3750 Note: This requires a sequential iscol with all indices. 3751 */ 3752 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3753 { 3754 PetscErrorCode ierr; 3755 PetscMPIInt rank,size; 3756 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3757 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3758 Mat M,Mreuse; 3759 MatScalar *aa,*vwork; 3760 MPI_Comm comm; 3761 Mat_SeqAIJ *aij; 3762 PetscBool colflag,allcolumns=PETSC_FALSE; 3763 3764 PetscFunctionBegin; 3765 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3766 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3767 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3768 3769 /* Check for special case: each processor gets entire matrix columns */ 3770 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3771 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3772 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3773 3774 if (call == MAT_REUSE_MATRIX) { 3775 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3776 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3777 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3778 } else { 3779 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3780 } 3781 3782 /* 3783 m - number of local rows 3784 n - number of columns (same on all processors) 3785 rstart - first row in new global matrix generated 3786 */ 3787 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3788 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3789 if (call == MAT_INITIAL_MATRIX) { 3790 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3791 ii = aij->i; 3792 jj = aij->j; 3793 3794 /* 3795 Determine the number of non-zeros in the diagonal and off-diagonal 3796 portions of the matrix in order to do correct preallocation 3797 */ 3798 3799 /* first get start and end of "diagonal" columns */ 3800 if (csize == PETSC_DECIDE) { 3801 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3802 if (mglobal == n) { /* square matrix */ 3803 nlocal = m; 3804 } else { 3805 nlocal = n/size + ((n % size) > rank); 3806 } 3807 } else { 3808 nlocal = csize; 3809 } 3810 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3811 rstart = rend - nlocal; 3812 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3813 3814 /* next, compute all the lengths */ 3815 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3816 olens = dlens + m; 3817 for (i=0; i<m; i++) { 3818 jend = ii[i+1] - ii[i]; 3819 olen = 0; 3820 dlen = 0; 3821 for (j=0; j<jend; j++) { 3822 if (*jj < rstart || *jj >= rend) olen++; 3823 else dlen++; 3824 jj++; 3825 } 3826 olens[i] = olen; 3827 dlens[i] = dlen; 3828 } 3829 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3830 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3831 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3832 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3833 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3834 ierr = PetscFree(dlens);CHKERRQ(ierr); 3835 } else { 3836 PetscInt ml,nl; 3837 3838 M = *newmat; 3839 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3840 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3841 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3842 /* 3843 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3844 rather than the slower MatSetValues(). 3845 */ 3846 M->was_assembled = PETSC_TRUE; 3847 M->assembled = PETSC_FALSE; 3848 } 3849 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3850 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3851 ii = aij->i; 3852 jj = aij->j; 3853 aa = aij->a; 3854 for (i=0; i<m; i++) { 3855 row = rstart + i; 3856 nz = ii[i+1] - ii[i]; 3857 cwork = jj; jj += nz; 3858 vwork = aa; aa += nz; 3859 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3860 } 3861 3862 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3863 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3864 *newmat = M; 3865 3866 /* save submatrix used in processor for next request */ 3867 if (call == MAT_INITIAL_MATRIX) { 3868 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3869 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3870 } 3871 PetscFunctionReturn(0); 3872 } 3873 3874 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3875 { 3876 PetscInt m,cstart, cend,j,nnz,i,d; 3877 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3878 const PetscInt *JJ; 3879 PetscScalar *values; 3880 PetscErrorCode ierr; 3881 PetscBool nooffprocentries; 3882 3883 PetscFunctionBegin; 3884 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3885 3886 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3887 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3888 m = B->rmap->n; 3889 cstart = B->cmap->rstart; 3890 cend = B->cmap->rend; 3891 rstart = B->rmap->rstart; 3892 3893 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3894 3895 #if defined(PETSC_USE_DEBUG) 3896 for (i=0; i<m; i++) { 3897 nnz = Ii[i+1]- Ii[i]; 3898 JJ = J + Ii[i]; 3899 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3900 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3901 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3902 } 3903 #endif 3904 3905 for (i=0; i<m; i++) { 3906 nnz = Ii[i+1]- Ii[i]; 3907 JJ = J + Ii[i]; 3908 nnz_max = PetscMax(nnz_max,nnz); 3909 d = 0; 3910 for (j=0; j<nnz; j++) { 3911 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3912 } 3913 d_nnz[i] = d; 3914 o_nnz[i] = nnz - d; 3915 } 3916 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3917 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3918 3919 if (v) values = (PetscScalar*)v; 3920 else { 3921 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3922 } 3923 3924 for (i=0; i<m; i++) { 3925 ii = i + rstart; 3926 nnz = Ii[i+1]- Ii[i]; 3927 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3928 } 3929 nooffprocentries = B->nooffprocentries; 3930 B->nooffprocentries = PETSC_TRUE; 3931 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3932 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3933 B->nooffprocentries = nooffprocentries; 3934 3935 if (!v) { 3936 ierr = PetscFree(values);CHKERRQ(ierr); 3937 } 3938 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3939 PetscFunctionReturn(0); 3940 } 3941 3942 /*@ 3943 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3944 (the default parallel PETSc format). 3945 3946 Collective on MPI_Comm 3947 3948 Input Parameters: 3949 + B - the matrix 3950 . i - the indices into j for the start of each local row (starts with zero) 3951 . j - the column indices for each local row (starts with zero) 3952 - v - optional values in the matrix 3953 3954 Level: developer 3955 3956 Notes: 3957 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3958 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3959 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3960 3961 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3962 3963 The format which is used for the sparse matrix input, is equivalent to a 3964 row-major ordering.. i.e for the following matrix, the input data expected is 3965 as shown 3966 3967 $ 1 0 0 3968 $ 2 0 3 P0 3969 $ ------- 3970 $ 4 5 6 P1 3971 $ 3972 $ Process0 [P0]: rows_owned=[0,1] 3973 $ i = {0,1,3} [size = nrow+1 = 2+1] 3974 $ j = {0,0,2} [size = 3] 3975 $ v = {1,2,3} [size = 3] 3976 $ 3977 $ Process1 [P1]: rows_owned=[2] 3978 $ i = {0,3} [size = nrow+1 = 1+1] 3979 $ j = {0,1,2} [size = 3] 3980 $ v = {4,5,6} [size = 3] 3981 3982 .keywords: matrix, aij, compressed row, sparse, parallel 3983 3984 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3985 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3986 @*/ 3987 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3988 { 3989 PetscErrorCode ierr; 3990 3991 PetscFunctionBegin; 3992 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3993 PetscFunctionReturn(0); 3994 } 3995 3996 /*@C 3997 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3998 (the default parallel PETSc format). For good matrix assembly performance 3999 the user should preallocate the matrix storage by setting the parameters 4000 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4001 performance can be increased by more than a factor of 50. 4002 4003 Collective on MPI_Comm 4004 4005 Input Parameters: 4006 + B - the matrix 4007 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4008 (same value is used for all local rows) 4009 . d_nnz - array containing the number of nonzeros in the various rows of the 4010 DIAGONAL portion of the local submatrix (possibly different for each row) 4011 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4012 The size of this array is equal to the number of local rows, i.e 'm'. 4013 For matrices that will be factored, you must leave room for (and set) 4014 the diagonal entry even if it is zero. 4015 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4016 submatrix (same value is used for all local rows). 4017 - o_nnz - array containing the number of nonzeros in the various rows of the 4018 OFF-DIAGONAL portion of the local submatrix (possibly different for 4019 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4020 structure. The size of this array is equal to the number 4021 of local rows, i.e 'm'. 4022 4023 If the *_nnz parameter is given then the *_nz parameter is ignored 4024 4025 The AIJ format (also called the Yale sparse matrix format or 4026 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4027 storage. The stored row and column indices begin with zero. 4028 See Users-Manual: ch_mat for details. 4029 4030 The parallel matrix is partitioned such that the first m0 rows belong to 4031 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4032 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4033 4034 The DIAGONAL portion of the local submatrix of a processor can be defined 4035 as the submatrix which is obtained by extraction the part corresponding to 4036 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4037 first row that belongs to the processor, r2 is the last row belonging to 4038 the this processor, and c1-c2 is range of indices of the local part of a 4039 vector suitable for applying the matrix to. This is an mxn matrix. In the 4040 common case of a square matrix, the row and column ranges are the same and 4041 the DIAGONAL part is also square. The remaining portion of the local 4042 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4043 4044 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4045 4046 You can call MatGetInfo() to get information on how effective the preallocation was; 4047 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4048 You can also run with the option -info and look for messages with the string 4049 malloc in them to see if additional memory allocation was needed. 4050 4051 Example usage: 4052 4053 Consider the following 8x8 matrix with 34 non-zero values, that is 4054 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4055 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4056 as follows: 4057 4058 .vb 4059 1 2 0 | 0 3 0 | 0 4 4060 Proc0 0 5 6 | 7 0 0 | 8 0 4061 9 0 10 | 11 0 0 | 12 0 4062 ------------------------------------- 4063 13 0 14 | 15 16 17 | 0 0 4064 Proc1 0 18 0 | 19 20 21 | 0 0 4065 0 0 0 | 22 23 0 | 24 0 4066 ------------------------------------- 4067 Proc2 25 26 27 | 0 0 28 | 29 0 4068 30 0 0 | 31 32 33 | 0 34 4069 .ve 4070 4071 This can be represented as a collection of submatrices as: 4072 4073 .vb 4074 A B C 4075 D E F 4076 G H I 4077 .ve 4078 4079 Where the submatrices A,B,C are owned by proc0, D,E,F are 4080 owned by proc1, G,H,I are owned by proc2. 4081 4082 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4083 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4084 The 'M','N' parameters are 8,8, and have the same values on all procs. 4085 4086 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4087 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4088 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4089 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4090 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4091 matrix, ans [DF] as another SeqAIJ matrix. 4092 4093 When d_nz, o_nz parameters are specified, d_nz storage elements are 4094 allocated for every row of the local diagonal submatrix, and o_nz 4095 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4096 One way to choose d_nz and o_nz is to use the max nonzerors per local 4097 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4098 In this case, the values of d_nz,o_nz are: 4099 .vb 4100 proc0 : dnz = 2, o_nz = 2 4101 proc1 : dnz = 3, o_nz = 2 4102 proc2 : dnz = 1, o_nz = 4 4103 .ve 4104 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4105 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4106 for proc3. i.e we are using 12+15+10=37 storage locations to store 4107 34 values. 4108 4109 When d_nnz, o_nnz parameters are specified, the storage is specified 4110 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4111 In the above case the values for d_nnz,o_nnz are: 4112 .vb 4113 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4114 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4115 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4116 .ve 4117 Here the space allocated is sum of all the above values i.e 34, and 4118 hence pre-allocation is perfect. 4119 4120 Level: intermediate 4121 4122 .keywords: matrix, aij, compressed row, sparse, parallel 4123 4124 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4125 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4126 @*/ 4127 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4128 { 4129 PetscErrorCode ierr; 4130 4131 PetscFunctionBegin; 4132 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4133 PetscValidType(B,1); 4134 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4135 PetscFunctionReturn(0); 4136 } 4137 4138 /*@ 4139 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4140 CSR format the local rows. 4141 4142 Collective on MPI_Comm 4143 4144 Input Parameters: 4145 + comm - MPI communicator 4146 . m - number of local rows (Cannot be PETSC_DECIDE) 4147 . n - This value should be the same as the local size used in creating the 4148 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4149 calculated if N is given) For square matrices n is almost always m. 4150 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4151 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4152 . i - row indices 4153 . j - column indices 4154 - a - matrix values 4155 4156 Output Parameter: 4157 . mat - the matrix 4158 4159 Level: intermediate 4160 4161 Notes: 4162 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4163 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4164 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4165 4166 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4167 4168 The format which is used for the sparse matrix input, is equivalent to a 4169 row-major ordering.. i.e for the following matrix, the input data expected is 4170 as shown 4171 4172 $ 1 0 0 4173 $ 2 0 3 P0 4174 $ ------- 4175 $ 4 5 6 P1 4176 $ 4177 $ Process0 [P0]: rows_owned=[0,1] 4178 $ i = {0,1,3} [size = nrow+1 = 2+1] 4179 $ j = {0,0,2} [size = 3] 4180 $ v = {1,2,3} [size = 3] 4181 $ 4182 $ Process1 [P1]: rows_owned=[2] 4183 $ i = {0,3} [size = nrow+1 = 1+1] 4184 $ j = {0,1,2} [size = 3] 4185 $ v = {4,5,6} [size = 3] 4186 4187 .keywords: matrix, aij, compressed row, sparse, parallel 4188 4189 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4190 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4191 @*/ 4192 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4193 { 4194 PetscErrorCode ierr; 4195 4196 PetscFunctionBegin; 4197 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4198 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4199 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4200 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4201 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4202 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4203 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4204 PetscFunctionReturn(0); 4205 } 4206 4207 /*@C 4208 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4209 (the default parallel PETSc format). For good matrix assembly performance 4210 the user should preallocate the matrix storage by setting the parameters 4211 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4212 performance can be increased by more than a factor of 50. 4213 4214 Collective on MPI_Comm 4215 4216 Input Parameters: 4217 + comm - MPI communicator 4218 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4219 This value should be the same as the local size used in creating the 4220 y vector for the matrix-vector product y = Ax. 4221 . n - This value should be the same as the local size used in creating the 4222 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4223 calculated if N is given) For square matrices n is almost always m. 4224 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4225 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4226 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4227 (same value is used for all local rows) 4228 . d_nnz - array containing the number of nonzeros in the various rows of the 4229 DIAGONAL portion of the local submatrix (possibly different for each row) 4230 or NULL, if d_nz is used to specify the nonzero structure. 4231 The size of this array is equal to the number of local rows, i.e 'm'. 4232 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4233 submatrix (same value is used for all local rows). 4234 - o_nnz - array containing the number of nonzeros in the various rows of the 4235 OFF-DIAGONAL portion of the local submatrix (possibly different for 4236 each row) or NULL, if o_nz is used to specify the nonzero 4237 structure. The size of this array is equal to the number 4238 of local rows, i.e 'm'. 4239 4240 Output Parameter: 4241 . A - the matrix 4242 4243 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4244 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4245 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4246 4247 Notes: 4248 If the *_nnz parameter is given then the *_nz parameter is ignored 4249 4250 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4251 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4252 storage requirements for this matrix. 4253 4254 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4255 processor than it must be used on all processors that share the object for 4256 that argument. 4257 4258 The user MUST specify either the local or global matrix dimensions 4259 (possibly both). 4260 4261 The parallel matrix is partitioned across processors such that the 4262 first m0 rows belong to process 0, the next m1 rows belong to 4263 process 1, the next m2 rows belong to process 2 etc.. where 4264 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4265 values corresponding to [m x N] submatrix. 4266 4267 The columns are logically partitioned with the n0 columns belonging 4268 to 0th partition, the next n1 columns belonging to the next 4269 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4270 4271 The DIAGONAL portion of the local submatrix on any given processor 4272 is the submatrix corresponding to the rows and columns m,n 4273 corresponding to the given processor. i.e diagonal matrix on 4274 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4275 etc. The remaining portion of the local submatrix [m x (N-n)] 4276 constitute the OFF-DIAGONAL portion. The example below better 4277 illustrates this concept. 4278 4279 For a square global matrix we define each processor's diagonal portion 4280 to be its local rows and the corresponding columns (a square submatrix); 4281 each processor's off-diagonal portion encompasses the remainder of the 4282 local matrix (a rectangular submatrix). 4283 4284 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4285 4286 When calling this routine with a single process communicator, a matrix of 4287 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4288 type of communicator, use the construction mechanism 4289 .vb 4290 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4291 .ve 4292 4293 $ MatCreate(...,&A); 4294 $ MatSetType(A,MATMPIAIJ); 4295 $ MatSetSizes(A, m,n,M,N); 4296 $ MatMPIAIJSetPreallocation(A,...); 4297 4298 By default, this format uses inodes (identical nodes) when possible. 4299 We search for consecutive rows with the same nonzero structure, thereby 4300 reusing matrix information to achieve increased efficiency. 4301 4302 Options Database Keys: 4303 + -mat_no_inode - Do not use inodes 4304 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4305 4306 4307 4308 Example usage: 4309 4310 Consider the following 8x8 matrix with 34 non-zero values, that is 4311 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4312 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4313 as follows 4314 4315 .vb 4316 1 2 0 | 0 3 0 | 0 4 4317 Proc0 0 5 6 | 7 0 0 | 8 0 4318 9 0 10 | 11 0 0 | 12 0 4319 ------------------------------------- 4320 13 0 14 | 15 16 17 | 0 0 4321 Proc1 0 18 0 | 19 20 21 | 0 0 4322 0 0 0 | 22 23 0 | 24 0 4323 ------------------------------------- 4324 Proc2 25 26 27 | 0 0 28 | 29 0 4325 30 0 0 | 31 32 33 | 0 34 4326 .ve 4327 4328 This can be represented as a collection of submatrices as 4329 4330 .vb 4331 A B C 4332 D E F 4333 G H I 4334 .ve 4335 4336 Where the submatrices A,B,C are owned by proc0, D,E,F are 4337 owned by proc1, G,H,I are owned by proc2. 4338 4339 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4340 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4341 The 'M','N' parameters are 8,8, and have the same values on all procs. 4342 4343 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4344 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4345 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4346 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4347 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4348 matrix, ans [DF] as another SeqAIJ matrix. 4349 4350 When d_nz, o_nz parameters are specified, d_nz storage elements are 4351 allocated for every row of the local diagonal submatrix, and o_nz 4352 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4353 One way to choose d_nz and o_nz is to use the max nonzerors per local 4354 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4355 In this case, the values of d_nz,o_nz are 4356 .vb 4357 proc0 : dnz = 2, o_nz = 2 4358 proc1 : dnz = 3, o_nz = 2 4359 proc2 : dnz = 1, o_nz = 4 4360 .ve 4361 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4362 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4363 for proc3. i.e we are using 12+15+10=37 storage locations to store 4364 34 values. 4365 4366 When d_nnz, o_nnz parameters are specified, the storage is specified 4367 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4368 In the above case the values for d_nnz,o_nnz are 4369 .vb 4370 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4371 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4372 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4373 .ve 4374 Here the space allocated is sum of all the above values i.e 34, and 4375 hence pre-allocation is perfect. 4376 4377 Level: intermediate 4378 4379 .keywords: matrix, aij, compressed row, sparse, parallel 4380 4381 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4382 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4383 @*/ 4384 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4385 { 4386 PetscErrorCode ierr; 4387 PetscMPIInt size; 4388 4389 PetscFunctionBegin; 4390 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4391 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4392 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4393 if (size > 1) { 4394 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4395 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4396 } else { 4397 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4398 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4399 } 4400 PetscFunctionReturn(0); 4401 } 4402 4403 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4404 { 4405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4406 PetscBool flg; 4407 PetscErrorCode ierr; 4408 4409 PetscFunctionBegin; 4410 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4411 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4412 if (Ad) *Ad = a->A; 4413 if (Ao) *Ao = a->B; 4414 if (colmap) *colmap = a->garray; 4415 PetscFunctionReturn(0); 4416 } 4417 4418 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4419 { 4420 PetscErrorCode ierr; 4421 PetscInt m,N,i,rstart,nnz,Ii; 4422 PetscInt *indx; 4423 PetscScalar *values; 4424 4425 PetscFunctionBegin; 4426 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4427 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4428 PetscInt *dnz,*onz,sum,bs,cbs; 4429 4430 if (n == PETSC_DECIDE) { 4431 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4432 } 4433 /* Check sum(n) = N */ 4434 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4435 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4436 4437 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4438 rstart -= m; 4439 4440 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4441 for (i=0; i<m; i++) { 4442 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4443 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4444 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4445 } 4446 4447 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4448 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4449 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4450 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4451 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4452 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4453 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4454 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4455 } 4456 4457 /* numeric phase */ 4458 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4459 for (i=0; i<m; i++) { 4460 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4461 Ii = i + rstart; 4462 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4463 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4464 } 4465 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4466 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4467 PetscFunctionReturn(0); 4468 } 4469 4470 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4471 { 4472 PetscErrorCode ierr; 4473 PetscMPIInt rank; 4474 PetscInt m,N,i,rstart,nnz; 4475 size_t len; 4476 const PetscInt *indx; 4477 PetscViewer out; 4478 char *name; 4479 Mat B; 4480 const PetscScalar *values; 4481 4482 PetscFunctionBegin; 4483 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4484 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4485 /* Should this be the type of the diagonal block of A? */ 4486 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4487 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4488 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4489 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4490 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4491 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4492 for (i=0; i<m; i++) { 4493 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4494 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4495 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4496 } 4497 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4498 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4499 4500 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4501 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4502 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4503 sprintf(name,"%s.%d",outfile,rank); 4504 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4505 ierr = PetscFree(name);CHKERRQ(ierr); 4506 ierr = MatView(B,out);CHKERRQ(ierr); 4507 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4508 ierr = MatDestroy(&B);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4513 { 4514 PetscErrorCode ierr; 4515 Mat_Merge_SeqsToMPI *merge; 4516 PetscContainer container; 4517 4518 PetscFunctionBegin; 4519 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4520 if (container) { 4521 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4522 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4523 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4524 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4525 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4526 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4527 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4528 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4529 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4530 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4531 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4532 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4533 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4534 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4535 ierr = PetscFree(merge);CHKERRQ(ierr); 4536 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4537 } 4538 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4539 PetscFunctionReturn(0); 4540 } 4541 4542 #include <../src/mat/utils/freespace.h> 4543 #include <petscbt.h> 4544 4545 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4546 { 4547 PetscErrorCode ierr; 4548 MPI_Comm comm; 4549 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4550 PetscMPIInt size,rank,taga,*len_s; 4551 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4552 PetscInt proc,m; 4553 PetscInt **buf_ri,**buf_rj; 4554 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4555 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4556 MPI_Request *s_waits,*r_waits; 4557 MPI_Status *status; 4558 MatScalar *aa=a->a; 4559 MatScalar **abuf_r,*ba_i; 4560 Mat_Merge_SeqsToMPI *merge; 4561 PetscContainer container; 4562 4563 PetscFunctionBegin; 4564 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4565 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4566 4567 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4568 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4569 4570 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4571 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4572 4573 bi = merge->bi; 4574 bj = merge->bj; 4575 buf_ri = merge->buf_ri; 4576 buf_rj = merge->buf_rj; 4577 4578 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4579 owners = merge->rowmap->range; 4580 len_s = merge->len_s; 4581 4582 /* send and recv matrix values */ 4583 /*-----------------------------*/ 4584 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4585 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4586 4587 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4588 for (proc=0,k=0; proc<size; proc++) { 4589 if (!len_s[proc]) continue; 4590 i = owners[proc]; 4591 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4592 k++; 4593 } 4594 4595 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4596 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4597 ierr = PetscFree(status);CHKERRQ(ierr); 4598 4599 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4600 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4601 4602 /* insert mat values of mpimat */ 4603 /*----------------------------*/ 4604 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4605 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4606 4607 for (k=0; k<merge->nrecv; k++) { 4608 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4609 nrows = *(buf_ri_k[k]); 4610 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4611 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4612 } 4613 4614 /* set values of ba */ 4615 m = merge->rowmap->n; 4616 for (i=0; i<m; i++) { 4617 arow = owners[rank] + i; 4618 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4619 bnzi = bi[i+1] - bi[i]; 4620 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4621 4622 /* add local non-zero vals of this proc's seqmat into ba */ 4623 anzi = ai[arow+1] - ai[arow]; 4624 aj = a->j + ai[arow]; 4625 aa = a->a + ai[arow]; 4626 nextaj = 0; 4627 for (j=0; nextaj<anzi; j++) { 4628 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4629 ba_i[j] += aa[nextaj++]; 4630 } 4631 } 4632 4633 /* add received vals into ba */ 4634 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4635 /* i-th row */ 4636 if (i == *nextrow[k]) { 4637 anzi = *(nextai[k]+1) - *nextai[k]; 4638 aj = buf_rj[k] + *(nextai[k]); 4639 aa = abuf_r[k] + *(nextai[k]); 4640 nextaj = 0; 4641 for (j=0; nextaj<anzi; j++) { 4642 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4643 ba_i[j] += aa[nextaj++]; 4644 } 4645 } 4646 nextrow[k]++; nextai[k]++; 4647 } 4648 } 4649 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4650 } 4651 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4652 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4653 4654 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4655 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4656 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4657 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4658 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4659 PetscFunctionReturn(0); 4660 } 4661 4662 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4663 { 4664 PetscErrorCode ierr; 4665 Mat B_mpi; 4666 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4667 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4668 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4669 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4670 PetscInt len,proc,*dnz,*onz,bs,cbs; 4671 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4672 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4673 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4674 MPI_Status *status; 4675 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4676 PetscBT lnkbt; 4677 Mat_Merge_SeqsToMPI *merge; 4678 PetscContainer container; 4679 4680 PetscFunctionBegin; 4681 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4682 4683 /* make sure it is a PETSc comm */ 4684 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4685 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4686 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4687 4688 ierr = PetscNew(&merge);CHKERRQ(ierr); 4689 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4690 4691 /* determine row ownership */ 4692 /*---------------------------------------------------------*/ 4693 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4694 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4695 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4696 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4697 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4698 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4699 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4700 4701 m = merge->rowmap->n; 4702 owners = merge->rowmap->range; 4703 4704 /* determine the number of messages to send, their lengths */ 4705 /*---------------------------------------------------------*/ 4706 len_s = merge->len_s; 4707 4708 len = 0; /* length of buf_si[] */ 4709 merge->nsend = 0; 4710 for (proc=0; proc<size; proc++) { 4711 len_si[proc] = 0; 4712 if (proc == rank) { 4713 len_s[proc] = 0; 4714 } else { 4715 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4716 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4717 } 4718 if (len_s[proc]) { 4719 merge->nsend++; 4720 nrows = 0; 4721 for (i=owners[proc]; i<owners[proc+1]; i++) { 4722 if (ai[i+1] > ai[i]) nrows++; 4723 } 4724 len_si[proc] = 2*(nrows+1); 4725 len += len_si[proc]; 4726 } 4727 } 4728 4729 /* determine the number and length of messages to receive for ij-structure */ 4730 /*-------------------------------------------------------------------------*/ 4731 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4732 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4733 4734 /* post the Irecv of j-structure */ 4735 /*-------------------------------*/ 4736 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4737 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4738 4739 /* post the Isend of j-structure */ 4740 /*--------------------------------*/ 4741 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4742 4743 for (proc=0, k=0; proc<size; proc++) { 4744 if (!len_s[proc]) continue; 4745 i = owners[proc]; 4746 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4747 k++; 4748 } 4749 4750 /* receives and sends of j-structure are complete */ 4751 /*------------------------------------------------*/ 4752 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4753 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4754 4755 /* send and recv i-structure */ 4756 /*---------------------------*/ 4757 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4758 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4759 4760 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4761 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4762 for (proc=0,k=0; proc<size; proc++) { 4763 if (!len_s[proc]) continue; 4764 /* form outgoing message for i-structure: 4765 buf_si[0]: nrows to be sent 4766 [1:nrows]: row index (global) 4767 [nrows+1:2*nrows+1]: i-structure index 4768 */ 4769 /*-------------------------------------------*/ 4770 nrows = len_si[proc]/2 - 1; 4771 buf_si_i = buf_si + nrows+1; 4772 buf_si[0] = nrows; 4773 buf_si_i[0] = 0; 4774 nrows = 0; 4775 for (i=owners[proc]; i<owners[proc+1]; i++) { 4776 anzi = ai[i+1] - ai[i]; 4777 if (anzi) { 4778 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4779 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4780 nrows++; 4781 } 4782 } 4783 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4784 k++; 4785 buf_si += len_si[proc]; 4786 } 4787 4788 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4789 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4790 4791 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4792 for (i=0; i<merge->nrecv; i++) { 4793 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4794 } 4795 4796 ierr = PetscFree(len_si);CHKERRQ(ierr); 4797 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4798 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4799 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4800 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4801 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4802 ierr = PetscFree(status);CHKERRQ(ierr); 4803 4804 /* compute a local seq matrix in each processor */ 4805 /*----------------------------------------------*/ 4806 /* allocate bi array and free space for accumulating nonzero column info */ 4807 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4808 bi[0] = 0; 4809 4810 /* create and initialize a linked list */ 4811 nlnk = N+1; 4812 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4813 4814 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4815 len = ai[owners[rank+1]] - ai[owners[rank]]; 4816 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4817 4818 current_space = free_space; 4819 4820 /* determine symbolic info for each local row */ 4821 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4822 4823 for (k=0; k<merge->nrecv; k++) { 4824 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4825 nrows = *buf_ri_k[k]; 4826 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4827 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4828 } 4829 4830 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4831 len = 0; 4832 for (i=0; i<m; i++) { 4833 bnzi = 0; 4834 /* add local non-zero cols of this proc's seqmat into lnk */ 4835 arow = owners[rank] + i; 4836 anzi = ai[arow+1] - ai[arow]; 4837 aj = a->j + ai[arow]; 4838 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4839 bnzi += nlnk; 4840 /* add received col data into lnk */ 4841 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4842 if (i == *nextrow[k]) { /* i-th row */ 4843 anzi = *(nextai[k]+1) - *nextai[k]; 4844 aj = buf_rj[k] + *nextai[k]; 4845 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4846 bnzi += nlnk; 4847 nextrow[k]++; nextai[k]++; 4848 } 4849 } 4850 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4851 4852 /* if free space is not available, make more free space */ 4853 if (current_space->local_remaining<bnzi) { 4854 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4855 nspacedouble++; 4856 } 4857 /* copy data into free space, then initialize lnk */ 4858 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4859 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4860 4861 current_space->array += bnzi; 4862 current_space->local_used += bnzi; 4863 current_space->local_remaining -= bnzi; 4864 4865 bi[i+1] = bi[i] + bnzi; 4866 } 4867 4868 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4869 4870 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4871 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4872 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4873 4874 /* create symbolic parallel matrix B_mpi */ 4875 /*---------------------------------------*/ 4876 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4877 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4878 if (n==PETSC_DECIDE) { 4879 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4880 } else { 4881 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4882 } 4883 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4884 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4885 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4886 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4887 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4888 4889 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4890 B_mpi->assembled = PETSC_FALSE; 4891 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4892 merge->bi = bi; 4893 merge->bj = bj; 4894 merge->buf_ri = buf_ri; 4895 merge->buf_rj = buf_rj; 4896 merge->coi = NULL; 4897 merge->coj = NULL; 4898 merge->owners_co = NULL; 4899 4900 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4901 4902 /* attach the supporting struct to B_mpi for reuse */ 4903 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4904 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4905 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4906 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4907 *mpimat = B_mpi; 4908 4909 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4910 PetscFunctionReturn(0); 4911 } 4912 4913 /*@C 4914 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4915 matrices from each processor 4916 4917 Collective on MPI_Comm 4918 4919 Input Parameters: 4920 + comm - the communicators the parallel matrix will live on 4921 . seqmat - the input sequential matrices 4922 . m - number of local rows (or PETSC_DECIDE) 4923 . n - number of local columns (or PETSC_DECIDE) 4924 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4925 4926 Output Parameter: 4927 . mpimat - the parallel matrix generated 4928 4929 Level: advanced 4930 4931 Notes: 4932 The dimensions of the sequential matrix in each processor MUST be the same. 4933 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4934 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4935 @*/ 4936 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4937 { 4938 PetscErrorCode ierr; 4939 PetscMPIInt size; 4940 4941 PetscFunctionBegin; 4942 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4943 if (size == 1) { 4944 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4945 if (scall == MAT_INITIAL_MATRIX) { 4946 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4947 } else { 4948 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4949 } 4950 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4951 PetscFunctionReturn(0); 4952 } 4953 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4954 if (scall == MAT_INITIAL_MATRIX) { 4955 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4956 } 4957 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4958 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4959 PetscFunctionReturn(0); 4960 } 4961 4962 /*@ 4963 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4964 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4965 with MatGetSize() 4966 4967 Not Collective 4968 4969 Input Parameters: 4970 + A - the matrix 4971 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4972 4973 Output Parameter: 4974 . A_loc - the local sequential matrix generated 4975 4976 Level: developer 4977 4978 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4979 4980 @*/ 4981 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4982 { 4983 PetscErrorCode ierr; 4984 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4985 Mat_SeqAIJ *mat,*a,*b; 4986 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4987 MatScalar *aa,*ba,*cam; 4988 PetscScalar *ca; 4989 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4990 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4991 PetscBool match; 4992 MPI_Comm comm; 4993 PetscMPIInt size; 4994 4995 PetscFunctionBegin; 4996 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4997 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4998 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4999 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5000 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5001 5002 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5003 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5004 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5005 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5006 aa = a->a; ba = b->a; 5007 if (scall == MAT_INITIAL_MATRIX) { 5008 if (size == 1) { 5009 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5010 PetscFunctionReturn(0); 5011 } 5012 5013 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5014 ci[0] = 0; 5015 for (i=0; i<am; i++) { 5016 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5017 } 5018 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5019 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5020 k = 0; 5021 for (i=0; i<am; i++) { 5022 ncols_o = bi[i+1] - bi[i]; 5023 ncols_d = ai[i+1] - ai[i]; 5024 /* off-diagonal portion of A */ 5025 for (jo=0; jo<ncols_o; jo++) { 5026 col = cmap[*bj]; 5027 if (col >= cstart) break; 5028 cj[k] = col; bj++; 5029 ca[k++] = *ba++; 5030 } 5031 /* diagonal portion of A */ 5032 for (j=0; j<ncols_d; j++) { 5033 cj[k] = cstart + *aj++; 5034 ca[k++] = *aa++; 5035 } 5036 /* off-diagonal portion of A */ 5037 for (j=jo; j<ncols_o; j++) { 5038 cj[k] = cmap[*bj++]; 5039 ca[k++] = *ba++; 5040 } 5041 } 5042 /* put together the new matrix */ 5043 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5044 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5045 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5046 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5047 mat->free_a = PETSC_TRUE; 5048 mat->free_ij = PETSC_TRUE; 5049 mat->nonew = 0; 5050 } else if (scall == MAT_REUSE_MATRIX) { 5051 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5052 ci = mat->i; cj = mat->j; cam = mat->a; 5053 for (i=0; i<am; i++) { 5054 /* off-diagonal portion of A */ 5055 ncols_o = bi[i+1] - bi[i]; 5056 for (jo=0; jo<ncols_o; jo++) { 5057 col = cmap[*bj]; 5058 if (col >= cstart) break; 5059 *cam++ = *ba++; bj++; 5060 } 5061 /* diagonal portion of A */ 5062 ncols_d = ai[i+1] - ai[i]; 5063 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5064 /* off-diagonal portion of A */ 5065 for (j=jo; j<ncols_o; j++) { 5066 *cam++ = *ba++; bj++; 5067 } 5068 } 5069 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5070 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5071 PetscFunctionReturn(0); 5072 } 5073 5074 /*@C 5075 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5076 5077 Not Collective 5078 5079 Input Parameters: 5080 + A - the matrix 5081 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5082 - row, col - index sets of rows and columns to extract (or NULL) 5083 5084 Output Parameter: 5085 . A_loc - the local sequential matrix generated 5086 5087 Level: developer 5088 5089 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5090 5091 @*/ 5092 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5093 { 5094 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5095 PetscErrorCode ierr; 5096 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5097 IS isrowa,iscola; 5098 Mat *aloc; 5099 PetscBool match; 5100 5101 PetscFunctionBegin; 5102 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5103 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5104 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5105 if (!row) { 5106 start = A->rmap->rstart; end = A->rmap->rend; 5107 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5108 } else { 5109 isrowa = *row; 5110 } 5111 if (!col) { 5112 start = A->cmap->rstart; 5113 cmap = a->garray; 5114 nzA = a->A->cmap->n; 5115 nzB = a->B->cmap->n; 5116 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5117 ncols = 0; 5118 for (i=0; i<nzB; i++) { 5119 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5120 else break; 5121 } 5122 imark = i; 5123 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5124 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5125 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5126 } else { 5127 iscola = *col; 5128 } 5129 if (scall != MAT_INITIAL_MATRIX) { 5130 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5131 aloc[0] = *A_loc; 5132 } 5133 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5134 if (!col) { /* attach global id of condensed columns */ 5135 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5136 } 5137 *A_loc = aloc[0]; 5138 ierr = PetscFree(aloc);CHKERRQ(ierr); 5139 if (!row) { 5140 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5141 } 5142 if (!col) { 5143 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5144 } 5145 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5146 PetscFunctionReturn(0); 5147 } 5148 5149 /*@C 5150 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5151 5152 Collective on Mat 5153 5154 Input Parameters: 5155 + A,B - the matrices in mpiaij format 5156 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5157 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5158 5159 Output Parameter: 5160 + rowb, colb - index sets of rows and columns of B to extract 5161 - B_seq - the sequential matrix generated 5162 5163 Level: developer 5164 5165 @*/ 5166 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5167 { 5168 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5169 PetscErrorCode ierr; 5170 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5171 IS isrowb,iscolb; 5172 Mat *bseq=NULL; 5173 5174 PetscFunctionBegin; 5175 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5176 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5177 } 5178 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5179 5180 if (scall == MAT_INITIAL_MATRIX) { 5181 start = A->cmap->rstart; 5182 cmap = a->garray; 5183 nzA = a->A->cmap->n; 5184 nzB = a->B->cmap->n; 5185 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5186 ncols = 0; 5187 for (i=0; i<nzB; i++) { /* row < local row index */ 5188 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5189 else break; 5190 } 5191 imark = i; 5192 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5193 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5194 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5195 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5196 } else { 5197 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5198 isrowb = *rowb; iscolb = *colb; 5199 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5200 bseq[0] = *B_seq; 5201 } 5202 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5203 *B_seq = bseq[0]; 5204 ierr = PetscFree(bseq);CHKERRQ(ierr); 5205 if (!rowb) { 5206 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5207 } else { 5208 *rowb = isrowb; 5209 } 5210 if (!colb) { 5211 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5212 } else { 5213 *colb = iscolb; 5214 } 5215 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5216 PetscFunctionReturn(0); 5217 } 5218 5219 /* 5220 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5221 of the OFF-DIAGONAL portion of local A 5222 5223 Collective on Mat 5224 5225 Input Parameters: 5226 + A,B - the matrices in mpiaij format 5227 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5228 5229 Output Parameter: 5230 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5231 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5232 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5233 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5234 5235 Level: developer 5236 5237 */ 5238 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5239 { 5240 VecScatter_MPI_General *gen_to,*gen_from; 5241 PetscErrorCode ierr; 5242 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5243 Mat_SeqAIJ *b_oth; 5244 VecScatter ctx; 5245 MPI_Comm comm; 5246 PetscMPIInt *rprocs,*sprocs,tag,rank; 5247 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5248 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5249 PetscScalar *b_otha,*bufa,*bufA,*vals; 5250 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5251 MPI_Request *rwaits = NULL,*swaits = NULL; 5252 MPI_Status *sstatus,rstatus; 5253 PetscMPIInt jj,size; 5254 VecScatterType type; 5255 PetscBool mpi1; 5256 5257 PetscFunctionBegin; 5258 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5259 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5260 5261 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5262 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5263 } 5264 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5265 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5266 5267 if (size == 1) { 5268 startsj_s = NULL; 5269 bufa_ptr = NULL; 5270 *B_oth = NULL; 5271 PetscFunctionReturn(0); 5272 } 5273 5274 ctx = a->Mvctx; 5275 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5276 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5277 if (!mpi1) { 5278 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5279 thus create a->Mvctx_mpi1 */ 5280 if (!a->Mvctx_mpi1) { 5281 a->Mvctx_mpi1_flg = PETSC_TRUE; 5282 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5283 } 5284 ctx = a->Mvctx_mpi1; 5285 } 5286 tag = ((PetscObject)ctx)->tag; 5287 5288 gen_to = (VecScatter_MPI_General*)ctx->todata; 5289 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5290 nrecvs = gen_from->n; 5291 nsends = gen_to->n; 5292 5293 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5294 srow = gen_to->indices; /* local row index to be sent */ 5295 sstarts = gen_to->starts; 5296 sprocs = gen_to->procs; 5297 sstatus = gen_to->sstatus; 5298 sbs = gen_to->bs; 5299 rstarts = gen_from->starts; 5300 rprocs = gen_from->procs; 5301 rbs = gen_from->bs; 5302 5303 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5304 if (scall == MAT_INITIAL_MATRIX) { 5305 /* i-array */ 5306 /*---------*/ 5307 /* post receives */ 5308 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5309 for (i=0; i<nrecvs; i++) { 5310 rowlen = rvalues + rstarts[i]*rbs; 5311 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5312 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5313 } 5314 5315 /* pack the outgoing message */ 5316 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5317 5318 sstartsj[0] = 0; 5319 rstartsj[0] = 0; 5320 len = 0; /* total length of j or a array to be sent */ 5321 k = 0; 5322 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5323 for (i=0; i<nsends; i++) { 5324 rowlen = svalues + sstarts[i]*sbs; 5325 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5326 for (j=0; j<nrows; j++) { 5327 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5328 for (l=0; l<sbs; l++) { 5329 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5330 5331 rowlen[j*sbs+l] = ncols; 5332 5333 len += ncols; 5334 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5335 } 5336 k++; 5337 } 5338 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5339 5340 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5341 } 5342 /* recvs and sends of i-array are completed */ 5343 i = nrecvs; 5344 while (i--) { 5345 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5346 } 5347 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5348 ierr = PetscFree(svalues);CHKERRQ(ierr); 5349 5350 /* allocate buffers for sending j and a arrays */ 5351 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5352 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5353 5354 /* create i-array of B_oth */ 5355 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5356 5357 b_othi[0] = 0; 5358 len = 0; /* total length of j or a array to be received */ 5359 k = 0; 5360 for (i=0; i<nrecvs; i++) { 5361 rowlen = rvalues + rstarts[i]*rbs; 5362 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5363 for (j=0; j<nrows; j++) { 5364 b_othi[k+1] = b_othi[k] + rowlen[j]; 5365 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5366 k++; 5367 } 5368 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5369 } 5370 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5371 5372 /* allocate space for j and a arrrays of B_oth */ 5373 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5374 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5375 5376 /* j-array */ 5377 /*---------*/ 5378 /* post receives of j-array */ 5379 for (i=0; i<nrecvs; i++) { 5380 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5381 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5382 } 5383 5384 /* pack the outgoing message j-array */ 5385 k = 0; 5386 for (i=0; i<nsends; i++) { 5387 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5388 bufJ = bufj+sstartsj[i]; 5389 for (j=0; j<nrows; j++) { 5390 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5391 for (ll=0; ll<sbs; ll++) { 5392 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5393 for (l=0; l<ncols; l++) { 5394 *bufJ++ = cols[l]; 5395 } 5396 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5397 } 5398 } 5399 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5400 } 5401 5402 /* recvs and sends of j-array are completed */ 5403 i = nrecvs; 5404 while (i--) { 5405 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5406 } 5407 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5408 } else if (scall == MAT_REUSE_MATRIX) { 5409 sstartsj = *startsj_s; 5410 rstartsj = *startsj_r; 5411 bufa = *bufa_ptr; 5412 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5413 b_otha = b_oth->a; 5414 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5415 5416 /* a-array */ 5417 /*---------*/ 5418 /* post receives of a-array */ 5419 for (i=0; i<nrecvs; i++) { 5420 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5421 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5422 } 5423 5424 /* pack the outgoing message a-array */ 5425 k = 0; 5426 for (i=0; i<nsends; i++) { 5427 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5428 bufA = bufa+sstartsj[i]; 5429 for (j=0; j<nrows; j++) { 5430 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5431 for (ll=0; ll<sbs; ll++) { 5432 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5433 for (l=0; l<ncols; l++) { 5434 *bufA++ = vals[l]; 5435 } 5436 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5437 } 5438 } 5439 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5440 } 5441 /* recvs and sends of a-array are completed */ 5442 i = nrecvs; 5443 while (i--) { 5444 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5445 } 5446 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5447 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5448 5449 if (scall == MAT_INITIAL_MATRIX) { 5450 /* put together the new matrix */ 5451 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5452 5453 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5454 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5455 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5456 b_oth->free_a = PETSC_TRUE; 5457 b_oth->free_ij = PETSC_TRUE; 5458 b_oth->nonew = 0; 5459 5460 ierr = PetscFree(bufj);CHKERRQ(ierr); 5461 if (!startsj_s || !bufa_ptr) { 5462 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5463 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5464 } else { 5465 *startsj_s = sstartsj; 5466 *startsj_r = rstartsj; 5467 *bufa_ptr = bufa; 5468 } 5469 } 5470 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5471 PetscFunctionReturn(0); 5472 } 5473 5474 /*@C 5475 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5476 5477 Not Collective 5478 5479 Input Parameters: 5480 . A - The matrix in mpiaij format 5481 5482 Output Parameter: 5483 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5484 . colmap - A map from global column index to local index into lvec 5485 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5486 5487 Level: developer 5488 5489 @*/ 5490 #if defined(PETSC_USE_CTABLE) 5491 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5492 #else 5493 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5494 #endif 5495 { 5496 Mat_MPIAIJ *a; 5497 5498 PetscFunctionBegin; 5499 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5500 PetscValidPointer(lvec, 2); 5501 PetscValidPointer(colmap, 3); 5502 PetscValidPointer(multScatter, 4); 5503 a = (Mat_MPIAIJ*) A->data; 5504 if (lvec) *lvec = a->lvec; 5505 if (colmap) *colmap = a->colmap; 5506 if (multScatter) *multScatter = a->Mvctx; 5507 PetscFunctionReturn(0); 5508 } 5509 5510 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5511 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5512 #if defined(PETSC_HAVE_MKL_SPARSE) 5513 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5514 #endif 5515 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5516 #if defined(PETSC_HAVE_ELEMENTAL) 5517 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5518 #endif 5519 #if defined(PETSC_HAVE_HYPRE) 5520 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5521 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5522 #endif 5523 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5524 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5525 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5526 5527 /* 5528 Computes (B'*A')' since computing B*A directly is untenable 5529 5530 n p p 5531 ( ) ( ) ( ) 5532 m ( A ) * n ( B ) = m ( C ) 5533 ( ) ( ) ( ) 5534 5535 */ 5536 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5537 { 5538 PetscErrorCode ierr; 5539 Mat At,Bt,Ct; 5540 5541 PetscFunctionBegin; 5542 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5543 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5544 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5545 ierr = MatDestroy(&At);CHKERRQ(ierr); 5546 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5547 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5548 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5549 PetscFunctionReturn(0); 5550 } 5551 5552 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5553 { 5554 PetscErrorCode ierr; 5555 PetscInt m=A->rmap->n,n=B->cmap->n; 5556 Mat Cmat; 5557 5558 PetscFunctionBegin; 5559 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5560 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5561 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5562 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5563 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5564 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5565 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5566 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5567 5568 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5569 5570 *C = Cmat; 5571 PetscFunctionReturn(0); 5572 } 5573 5574 /* ----------------------------------------------------------------*/ 5575 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5576 { 5577 PetscErrorCode ierr; 5578 5579 PetscFunctionBegin; 5580 if (scall == MAT_INITIAL_MATRIX) { 5581 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5582 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5583 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5584 } 5585 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5586 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5587 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5588 PetscFunctionReturn(0); 5589 } 5590 5591 /*MC 5592 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5593 5594 Options Database Keys: 5595 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5596 5597 Level: beginner 5598 5599 .seealso: MatCreateAIJ() 5600 M*/ 5601 5602 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5603 { 5604 Mat_MPIAIJ *b; 5605 PetscErrorCode ierr; 5606 PetscMPIInt size; 5607 5608 PetscFunctionBegin; 5609 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5610 5611 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5612 B->data = (void*)b; 5613 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5614 B->assembled = PETSC_FALSE; 5615 B->insertmode = NOT_SET_VALUES; 5616 b->size = size; 5617 5618 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5619 5620 /* build cache for off array entries formed */ 5621 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5622 5623 b->donotstash = PETSC_FALSE; 5624 b->colmap = 0; 5625 b->garray = 0; 5626 b->roworiented = PETSC_TRUE; 5627 5628 /* stuff used for matrix vector multiply */ 5629 b->lvec = NULL; 5630 b->Mvctx = NULL; 5631 5632 /* stuff for MatGetRow() */ 5633 b->rowindices = 0; 5634 b->rowvalues = 0; 5635 b->getrowactive = PETSC_FALSE; 5636 5637 /* flexible pointer used in CUSP/CUSPARSE classes */ 5638 b->spptr = NULL; 5639 5640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5649 #if defined(PETSC_HAVE_MKL_SPARSE) 5650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5651 #endif 5652 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5654 #if defined(PETSC_HAVE_ELEMENTAL) 5655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5656 #endif 5657 #if defined(PETSC_HAVE_HYPRE) 5658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5659 #endif 5660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5665 #if defined(PETSC_HAVE_HYPRE) 5666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5667 #endif 5668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5669 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5670 PetscFunctionReturn(0); 5671 } 5672 5673 /*@C 5674 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5675 and "off-diagonal" part of the matrix in CSR format. 5676 5677 Collective on MPI_Comm 5678 5679 Input Parameters: 5680 + comm - MPI communicator 5681 . m - number of local rows (Cannot be PETSC_DECIDE) 5682 . n - This value should be the same as the local size used in creating the 5683 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5684 calculated if N is given) For square matrices n is almost always m. 5685 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5686 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5687 . i - row indices for "diagonal" portion of matrix 5688 . j - column indices 5689 . a - matrix values 5690 . oi - row indices for "off-diagonal" portion of matrix 5691 . oj - column indices 5692 - oa - matrix values 5693 5694 Output Parameter: 5695 . mat - the matrix 5696 5697 Level: advanced 5698 5699 Notes: 5700 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5701 must free the arrays once the matrix has been destroyed and not before. 5702 5703 The i and j indices are 0 based 5704 5705 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5706 5707 This sets local rows and cannot be used to set off-processor values. 5708 5709 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5710 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5711 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5712 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5713 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5714 communication if it is known that only local entries will be set. 5715 5716 .keywords: matrix, aij, compressed row, sparse, parallel 5717 5718 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5719 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5720 @*/ 5721 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5722 { 5723 PetscErrorCode ierr; 5724 Mat_MPIAIJ *maij; 5725 5726 PetscFunctionBegin; 5727 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5728 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5729 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5730 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5731 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5732 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5733 maij = (Mat_MPIAIJ*) (*mat)->data; 5734 5735 (*mat)->preallocated = PETSC_TRUE; 5736 5737 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5738 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5739 5740 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5741 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5742 5743 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5744 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5745 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5746 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5747 5748 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5749 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5750 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5751 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5752 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5753 PetscFunctionReturn(0); 5754 } 5755 5756 /* 5757 Special version for direct calls from Fortran 5758 */ 5759 #include <petsc/private/fortranimpl.h> 5760 5761 /* Change these macros so can be used in void function */ 5762 #undef CHKERRQ 5763 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5764 #undef SETERRQ2 5765 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5766 #undef SETERRQ3 5767 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5768 #undef SETERRQ 5769 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5770 5771 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5772 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5773 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5774 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5775 #else 5776 #endif 5777 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5778 { 5779 Mat mat = *mmat; 5780 PetscInt m = *mm, n = *mn; 5781 InsertMode addv = *maddv; 5782 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5783 PetscScalar value; 5784 PetscErrorCode ierr; 5785 5786 MatCheckPreallocated(mat,1); 5787 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5788 5789 #if defined(PETSC_USE_DEBUG) 5790 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5791 #endif 5792 { 5793 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5794 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5795 PetscBool roworiented = aij->roworiented; 5796 5797 /* Some Variables required in the macro */ 5798 Mat A = aij->A; 5799 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5800 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5801 MatScalar *aa = a->a; 5802 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5803 Mat B = aij->B; 5804 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5805 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5806 MatScalar *ba = b->a; 5807 5808 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5809 PetscInt nonew = a->nonew; 5810 MatScalar *ap1,*ap2; 5811 5812 PetscFunctionBegin; 5813 for (i=0; i<m; i++) { 5814 if (im[i] < 0) continue; 5815 #if defined(PETSC_USE_DEBUG) 5816 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5817 #endif 5818 if (im[i] >= rstart && im[i] < rend) { 5819 row = im[i] - rstart; 5820 lastcol1 = -1; 5821 rp1 = aj + ai[row]; 5822 ap1 = aa + ai[row]; 5823 rmax1 = aimax[row]; 5824 nrow1 = ailen[row]; 5825 low1 = 0; 5826 high1 = nrow1; 5827 lastcol2 = -1; 5828 rp2 = bj + bi[row]; 5829 ap2 = ba + bi[row]; 5830 rmax2 = bimax[row]; 5831 nrow2 = bilen[row]; 5832 low2 = 0; 5833 high2 = nrow2; 5834 5835 for (j=0; j<n; j++) { 5836 if (roworiented) value = v[i*n+j]; 5837 else value = v[i+j*m]; 5838 if (in[j] >= cstart && in[j] < cend) { 5839 col = in[j] - cstart; 5840 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5841 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5842 } else if (in[j] < 0) continue; 5843 #if defined(PETSC_USE_DEBUG) 5844 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5845 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5846 #endif 5847 else { 5848 if (mat->was_assembled) { 5849 if (!aij->colmap) { 5850 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5851 } 5852 #if defined(PETSC_USE_CTABLE) 5853 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5854 col--; 5855 #else 5856 col = aij->colmap[in[j]] - 1; 5857 #endif 5858 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5859 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5860 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5861 col = in[j]; 5862 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5863 B = aij->B; 5864 b = (Mat_SeqAIJ*)B->data; 5865 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5866 rp2 = bj + bi[row]; 5867 ap2 = ba + bi[row]; 5868 rmax2 = bimax[row]; 5869 nrow2 = bilen[row]; 5870 low2 = 0; 5871 high2 = nrow2; 5872 bm = aij->B->rmap->n; 5873 ba = b->a; 5874 } 5875 } else col = in[j]; 5876 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5877 } 5878 } 5879 } else if (!aij->donotstash) { 5880 if (roworiented) { 5881 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5882 } else { 5883 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5884 } 5885 } 5886 } 5887 } 5888 PetscFunctionReturnVoid(); 5889 } 5890 5891