1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 637 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 638 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 639 */ 640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 641 { 642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 643 Mat A = aij->A; /* diagonal part of the matrix */ 644 Mat B = aij->B; /* offdiagonal part of the matrix */ 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 647 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 648 PetscInt *ailen = a->ilen,*aj = a->j; 649 PetscInt *bilen = b->ilen,*bj = b->j; 650 PetscInt am = aij->A->rmap->n,j; 651 PetscInt diag_so_far = 0,dnz; 652 PetscInt offd_so_far = 0,onz; 653 654 PetscFunctionBegin; 655 /* Iterate over all rows of the matrix */ 656 for (j=0; j<am; j++) { 657 dnz = onz = 0; 658 /* Iterate over all non-zero columns of the current row */ 659 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 660 /* If column is in the diagonal */ 661 if (mat_j[col] >= cstart && mat_j[col] < cend) { 662 aj[diag_so_far++] = mat_j[col] - cstart; 663 dnz++; 664 } else { /* off-diagonal entries */ 665 bj[offd_so_far++] = mat_j[col]; 666 onz++; 667 } 668 } 669 ailen[j] = dnz; 670 bilen[j] = onz; 671 } 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 677 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 678 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 679 */ 680 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 Mat A = aij->A; /* diagonal part of the matrix */ 684 Mat B = aij->B; /* offdiagonal part of the matrix */ 685 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 686 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 687 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 688 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 689 PetscInt *ailen = a->ilen,*aj = a->j; 690 PetscInt *bilen = b->ilen,*bj = b->j; 691 PetscInt am = aij->A->rmap->n,j; 692 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; 693 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 694 PetscScalar *aa = a->a,*ba = b->a; 695 696 PetscFunctionBegin; 697 /* Iterate over all rows of the matrix */ 698 for (j=0; j<am; j++) { 699 dnz_row = onz_row = 0; 700 rowstart_offd = full_offd_i[j]; 701 rowstart_diag = full_diag_i[j]; 702 /* Iterate over all non-zero columns of the current row */ 703 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 704 /* If column is in the diagonal */ 705 if (mat_j[col] >= cstart && mat_j[col] < cend) { 706 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 707 aa[rowstart_diag+dnz_row] = mat_a[col]; 708 dnz_row++; 709 } else { /* off-diagonal entries */ 710 bj[rowstart_offd+onz_row] = mat_j[col]; 711 ba[rowstart_offd+onz_row] = mat_a[col]; 712 onz_row++; 713 } 714 } 715 ailen[j] = dnz_row; 716 bilen[j] = onz_row; 717 } 718 PetscFunctionReturn(0); 719 } 720 721 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 722 { 723 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 724 PetscErrorCode ierr; 725 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 726 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 727 728 PetscFunctionBegin; 729 for (i=0; i<m; i++) { 730 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 731 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 732 if (idxm[i] >= rstart && idxm[i] < rend) { 733 row = idxm[i] - rstart; 734 for (j=0; j<n; j++) { 735 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 736 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 737 if (idxn[j] >= cstart && idxn[j] < cend) { 738 col = idxn[j] - cstart; 739 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 740 } else { 741 if (!aij->colmap) { 742 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 743 } 744 #if defined(PETSC_USE_CTABLE) 745 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 746 col--; 747 #else 748 col = aij->colmap[idxn[j]] - 1; 749 #endif 750 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 751 else { 752 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 753 } 754 } 755 } 756 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 757 } 758 PetscFunctionReturn(0); 759 } 760 761 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 762 763 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 764 { 765 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 766 PetscErrorCode ierr; 767 PetscInt nstash,reallocs; 768 769 PetscFunctionBegin; 770 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 771 772 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 773 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 774 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 775 PetscFunctionReturn(0); 776 } 777 778 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 779 { 780 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 781 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 782 PetscErrorCode ierr; 783 PetscMPIInt n; 784 PetscInt i,j,rstart,ncols,flg; 785 PetscInt *row,*col; 786 PetscBool other_disassembled; 787 PetscScalar *val; 788 789 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 790 791 PetscFunctionBegin; 792 if (!aij->donotstash && !mat->nooffprocentries) { 793 while (1) { 794 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 795 if (!flg) break; 796 797 for (i=0; i<n; ) { 798 /* Now identify the consecutive vals belonging to the same row */ 799 for (j=i,rstart=row[j]; j<n; j++) { 800 if (row[j] != rstart) break; 801 } 802 if (j < n) ncols = j-i; 803 else ncols = n-i; 804 /* Now assemble all these values with a single function call */ 805 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 806 807 i = j; 808 } 809 } 810 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 811 } 812 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 813 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 814 815 /* determine if any processor has disassembled, if so we must 816 also disassemble ourselfs, in order that we may reassemble. */ 817 /* 818 if nonzero structure of submatrix B cannot change then we know that 819 no processor disassembled thus we can skip this stuff 820 */ 821 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 822 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 823 if (mat->was_assembled && !other_disassembled) { 824 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 825 } 826 } 827 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 828 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 829 } 830 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 831 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 832 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 833 834 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 835 836 aij->rowvalues = 0; 837 838 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 839 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 840 841 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 842 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 843 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 844 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 845 } 846 PetscFunctionReturn(0); 847 } 848 849 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 850 { 851 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 852 PetscErrorCode ierr; 853 854 PetscFunctionBegin; 855 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 856 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 857 PetscFunctionReturn(0); 858 } 859 860 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 861 { 862 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 863 PetscInt *lrows; 864 PetscInt r, len; 865 PetscBool cong; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 /* get locally owned rows */ 870 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 871 /* fix right hand side if needed */ 872 if (x && b) { 873 const PetscScalar *xx; 874 PetscScalar *bb; 875 876 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 877 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 878 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 879 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 880 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 881 } 882 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 883 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 884 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 885 if ((diag != 0.0) && cong) { 886 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 887 } else if (diag != 0.0) { 888 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 889 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 890 for (r = 0; r < len; ++r) { 891 const PetscInt row = lrows[r] + A->rmap->rstart; 892 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 893 } 894 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 895 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 } else { 897 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(lrows);CHKERRQ(ierr); 900 901 /* only change matrix nonzero state if pattern was allowed to be changed */ 902 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 903 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 904 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 905 } 906 PetscFunctionReturn(0); 907 } 908 909 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 910 { 911 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 912 PetscErrorCode ierr; 913 PetscMPIInt n = A->rmap->n; 914 PetscInt i,j,r,m,p = 0,len = 0; 915 PetscInt *lrows,*owners = A->rmap->range; 916 PetscSFNode *rrows; 917 PetscSF sf; 918 const PetscScalar *xx; 919 PetscScalar *bb,*mask; 920 Vec xmask,lmask; 921 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 922 const PetscInt *aj, *ii,*ridx; 923 PetscScalar *aa; 924 925 PetscFunctionBegin; 926 /* Create SF where leaves are input rows and roots are owned rows */ 927 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 928 for (r = 0; r < n; ++r) lrows[r] = -1; 929 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 930 for (r = 0; r < N; ++r) { 931 const PetscInt idx = rows[r]; 932 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 933 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 934 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 935 } 936 rrows[r].rank = p; 937 rrows[r].index = rows[r] - owners[p]; 938 } 939 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 940 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 941 /* Collect flags for rows to be zeroed */ 942 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 943 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 944 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 945 /* Compress and put in row numbers */ 946 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 947 /* zero diagonal part of matrix */ 948 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 949 /* handle off diagonal part of matrix */ 950 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 951 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 952 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 953 for (i=0; i<len; i++) bb[lrows[i]] = 1; 954 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 955 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 956 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 957 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 958 if (x) { 959 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 960 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 962 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 963 } 964 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 965 /* remove zeroed rows of off diagonal matrix */ 966 ii = aij->i; 967 for (i=0; i<len; i++) { 968 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 969 } 970 /* loop over all elements of off process part of matrix zeroing removed columns*/ 971 if (aij->compressedrow.use) { 972 m = aij->compressedrow.nrows; 973 ii = aij->compressedrow.i; 974 ridx = aij->compressedrow.rindex; 975 for (i=0; i<m; i++) { 976 n = ii[i+1] - ii[i]; 977 aj = aij->j + ii[i]; 978 aa = aij->a + ii[i]; 979 980 for (j=0; j<n; j++) { 981 if (PetscAbsScalar(mask[*aj])) { 982 if (b) bb[*ridx] -= *aa*xx[*aj]; 983 *aa = 0.0; 984 } 985 aa++; 986 aj++; 987 } 988 ridx++; 989 } 990 } else { /* do not use compressed row format */ 991 m = l->B->rmap->n; 992 for (i=0; i<m; i++) { 993 n = ii[i+1] - ii[i]; 994 aj = aij->j + ii[i]; 995 aa = aij->a + ii[i]; 996 for (j=0; j<n; j++) { 997 if (PetscAbsScalar(mask[*aj])) { 998 if (b) bb[i] -= *aa*xx[*aj]; 999 *aa = 0.0; 1000 } 1001 aa++; 1002 aj++; 1003 } 1004 } 1005 } 1006 if (x) { 1007 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1008 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1009 } 1010 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1011 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1012 ierr = PetscFree(lrows);CHKERRQ(ierr); 1013 1014 /* only change matrix nonzero state if pattern was allowed to be changed */ 1015 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1016 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1017 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1018 } 1019 PetscFunctionReturn(0); 1020 } 1021 1022 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1023 { 1024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1025 PetscErrorCode ierr; 1026 PetscInt nt; 1027 VecScatter Mvctx = a->Mvctx; 1028 1029 PetscFunctionBegin; 1030 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1031 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1032 1033 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1037 PetscFunctionReturn(0); 1038 } 1039 1040 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1041 { 1042 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1043 PetscErrorCode ierr; 1044 1045 PetscFunctionBegin; 1046 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1051 { 1052 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1053 PetscErrorCode ierr; 1054 VecScatter Mvctx = a->Mvctx; 1055 1056 PetscFunctionBegin; 1057 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1058 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1059 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1060 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1066 { 1067 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1068 PetscErrorCode ierr; 1069 PetscBool merged; 1070 1071 PetscFunctionBegin; 1072 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1073 /* do nondiagonal part */ 1074 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1075 if (!merged) { 1076 /* send it on its way */ 1077 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1078 /* do local part */ 1079 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1080 /* receive remote parts: note this assumes the values are not actually */ 1081 /* added in yy until the next line, */ 1082 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1083 } else { 1084 /* do local part */ 1085 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1086 /* send it on its way */ 1087 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1088 /* values actually were received in the Begin() but we need to call this nop */ 1089 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 } 1091 PetscFunctionReturn(0); 1092 } 1093 1094 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1095 { 1096 MPI_Comm comm; 1097 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1098 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1099 IS Me,Notme; 1100 PetscErrorCode ierr; 1101 PetscInt M,N,first,last,*notme,i; 1102 PetscMPIInt size; 1103 1104 PetscFunctionBegin; 1105 /* Easy test: symmetric diagonal block */ 1106 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1107 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1108 if (!*f) PetscFunctionReturn(0); 1109 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1111 if (size == 1) PetscFunctionReturn(0); 1112 1113 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1114 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1115 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1116 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1117 for (i=0; i<first; i++) notme[i] = i; 1118 for (i=last; i<M; i++) notme[i-last+first] = i; 1119 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1120 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1121 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1122 Aoff = Aoffs[0]; 1123 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1124 Boff = Boffs[0]; 1125 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1126 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1127 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1128 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1129 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1130 ierr = PetscFree(notme);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1135 { 1136 PetscErrorCode ierr; 1137 1138 PetscFunctionBegin; 1139 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1144 { 1145 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1146 PetscErrorCode ierr; 1147 1148 PetscFunctionBegin; 1149 /* do nondiagonal part */ 1150 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1151 /* send it on its way */ 1152 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1153 /* do local part */ 1154 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1155 /* receive remote parts */ 1156 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 /* 1161 This only works correctly for square matrices where the subblock A->A is the 1162 diagonal block 1163 */ 1164 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1165 { 1166 PetscErrorCode ierr; 1167 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1168 1169 PetscFunctionBegin; 1170 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1171 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1172 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1177 { 1178 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1179 PetscErrorCode ierr; 1180 1181 PetscFunctionBegin; 1182 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1183 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1184 PetscFunctionReturn(0); 1185 } 1186 1187 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1188 { 1189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1190 PetscErrorCode ierr; 1191 1192 PetscFunctionBegin; 1193 #if defined(PETSC_USE_LOG) 1194 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1195 #endif 1196 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1197 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1198 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1199 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1200 #if defined(PETSC_USE_CTABLE) 1201 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1202 #else 1203 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1204 #endif 1205 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1206 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1207 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1208 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1209 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1210 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1211 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1212 1213 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1222 #if defined(PETSC_HAVE_ELEMENTAL) 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1224 #endif 1225 #if defined(PETSC_HAVE_HYPRE) 1226 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1228 #endif 1229 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1235 { 1236 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1237 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1238 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1239 PetscErrorCode ierr; 1240 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1241 int fd; 1242 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1243 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1244 PetscScalar *column_values; 1245 PetscInt message_count,flowcontrolcount; 1246 FILE *file; 1247 1248 PetscFunctionBegin; 1249 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1250 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1251 nz = A->nz + B->nz; 1252 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1253 if (!rank) { 1254 header[0] = MAT_FILE_CLASSID; 1255 header[1] = mat->rmap->N; 1256 header[2] = mat->cmap->N; 1257 1258 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1259 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1260 /* get largest number of rows any processor has */ 1261 rlen = mat->rmap->n; 1262 range = mat->rmap->range; 1263 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1264 } else { 1265 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1266 rlen = mat->rmap->n; 1267 } 1268 1269 /* load up the local row counts */ 1270 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1271 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1272 1273 /* store the row lengths to the file */ 1274 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1275 if (!rank) { 1276 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1277 for (i=1; i<size; i++) { 1278 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1279 rlen = range[i+1] - range[i]; 1280 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1281 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1282 } 1283 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1284 } else { 1285 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1286 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1287 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1288 } 1289 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1290 1291 /* load up the local column indices */ 1292 nzmax = nz; /* th processor needs space a largest processor needs */ 1293 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1294 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1295 cnt = 0; 1296 for (i=0; i<mat->rmap->n; i++) { 1297 for (j=B->i[i]; j<B->i[i+1]; j++) { 1298 if ((col = garray[B->j[j]]) > cstart) break; 1299 column_indices[cnt++] = col; 1300 } 1301 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1302 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1303 } 1304 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1305 1306 /* store the column indices to the file */ 1307 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1308 if (!rank) { 1309 MPI_Status status; 1310 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1311 for (i=1; i<size; i++) { 1312 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1313 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1314 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1315 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1316 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1317 } 1318 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1319 } else { 1320 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1321 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1322 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1323 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1324 } 1325 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1326 1327 /* load up the local column values */ 1328 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1329 cnt = 0; 1330 for (i=0; i<mat->rmap->n; i++) { 1331 for (j=B->i[i]; j<B->i[i+1]; j++) { 1332 if (garray[B->j[j]] > cstart) break; 1333 column_values[cnt++] = B->a[j]; 1334 } 1335 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1336 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1337 } 1338 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1339 1340 /* store the column values to the file */ 1341 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1342 if (!rank) { 1343 MPI_Status status; 1344 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1345 for (i=1; i<size; i++) { 1346 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1347 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1348 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1349 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1350 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1351 } 1352 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1353 } else { 1354 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1355 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1357 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1358 } 1359 ierr = PetscFree(column_values);CHKERRQ(ierr); 1360 1361 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1362 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #include <petscdraw.h> 1367 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1368 { 1369 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1370 PetscErrorCode ierr; 1371 PetscMPIInt rank = aij->rank,size = aij->size; 1372 PetscBool isdraw,iascii,isbinary; 1373 PetscViewer sviewer; 1374 PetscViewerFormat format; 1375 1376 PetscFunctionBegin; 1377 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1378 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1379 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1380 if (iascii) { 1381 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1382 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1383 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1384 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1385 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1386 for (i=0; i<(PetscInt)size; i++) { 1387 nmax = PetscMax(nmax,nz[i]); 1388 nmin = PetscMin(nmin,nz[i]); 1389 navg += nz[i]; 1390 } 1391 ierr = PetscFree(nz);CHKERRQ(ierr); 1392 navg = navg/size; 1393 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1394 PetscFunctionReturn(0); 1395 } 1396 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1397 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1398 MatInfo info; 1399 PetscBool inodes; 1400 1401 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1402 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1403 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1404 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1405 if (!inodes) { 1406 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1407 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1408 } else { 1409 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1410 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1411 } 1412 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1413 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1414 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1415 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1416 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1418 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1419 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1422 PetscInt inodecount,inodelimit,*inodes; 1423 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1424 if (inodes) { 1425 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1426 } else { 1427 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1428 } 1429 PetscFunctionReturn(0); 1430 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1431 PetscFunctionReturn(0); 1432 } 1433 } else if (isbinary) { 1434 if (size == 1) { 1435 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1436 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1437 } else { 1438 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1439 } 1440 PetscFunctionReturn(0); 1441 } else if (isdraw) { 1442 PetscDraw draw; 1443 PetscBool isnull; 1444 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1445 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1446 if (isnull) PetscFunctionReturn(0); 1447 } 1448 1449 { 1450 /* assemble the entire matrix onto first processor. */ 1451 Mat A; 1452 Mat_SeqAIJ *Aloc; 1453 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1454 MatScalar *a; 1455 1456 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1457 if (!rank) { 1458 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1459 } else { 1460 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1461 } 1462 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1463 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1464 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1465 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1466 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1467 1468 /* copy over the A part */ 1469 Aloc = (Mat_SeqAIJ*)aij->A->data; 1470 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1471 row = mat->rmap->rstart; 1472 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1473 for (i=0; i<m; i++) { 1474 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1475 row++; 1476 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1477 } 1478 aj = Aloc->j; 1479 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1480 1481 /* copy over the B part */ 1482 Aloc = (Mat_SeqAIJ*)aij->B->data; 1483 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1484 row = mat->rmap->rstart; 1485 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1486 ct = cols; 1487 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1488 for (i=0; i<m; i++) { 1489 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1490 row++; 1491 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1492 } 1493 ierr = PetscFree(ct);CHKERRQ(ierr); 1494 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1495 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1496 /* 1497 Everyone has to call to draw the matrix since the graphics waits are 1498 synchronized across all processors that share the PetscDraw object 1499 */ 1500 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1501 if (!rank) { 1502 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1503 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1504 } 1505 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1506 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1507 ierr = MatDestroy(&A);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1513 { 1514 PetscErrorCode ierr; 1515 PetscBool iascii,isdraw,issocket,isbinary; 1516 1517 PetscFunctionBegin; 1518 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1519 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1521 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1522 if (iascii || isdraw || isbinary || issocket) { 1523 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1524 } 1525 PetscFunctionReturn(0); 1526 } 1527 1528 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1529 { 1530 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1531 PetscErrorCode ierr; 1532 Vec bb1 = 0; 1533 PetscBool hasop; 1534 1535 PetscFunctionBegin; 1536 if (flag == SOR_APPLY_UPPER) { 1537 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1542 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1543 } 1544 1545 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1546 if (flag & SOR_ZERO_INITIAL_GUESS) { 1547 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1548 its--; 1549 } 1550 1551 while (its--) { 1552 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1553 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1554 1555 /* update rhs: bb1 = bb - B*x */ 1556 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1557 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1558 1559 /* local sweep */ 1560 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1561 } 1562 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1563 if (flag & SOR_ZERO_INITIAL_GUESS) { 1564 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1565 its--; 1566 } 1567 while (its--) { 1568 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1570 1571 /* update rhs: bb1 = bb - B*x */ 1572 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1573 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1574 1575 /* local sweep */ 1576 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1577 } 1578 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1579 if (flag & SOR_ZERO_INITIAL_GUESS) { 1580 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1581 its--; 1582 } 1583 while (its--) { 1584 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 1587 /* update rhs: bb1 = bb - B*x */ 1588 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1589 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1590 1591 /* local sweep */ 1592 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1593 } 1594 } else if (flag & SOR_EISENSTAT) { 1595 Vec xx1; 1596 1597 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1598 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1599 1600 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1602 if (!mat->diag) { 1603 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1604 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1605 } 1606 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1607 if (hasop) { 1608 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1609 } else { 1610 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1611 } 1612 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1613 1614 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1615 1616 /* local sweep */ 1617 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1618 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1619 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1620 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1621 1622 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1623 1624 matin->factorerrortype = mat->A->factorerrortype; 1625 PetscFunctionReturn(0); 1626 } 1627 1628 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1629 { 1630 Mat aA,aB,Aperm; 1631 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1632 PetscScalar *aa,*ba; 1633 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1634 PetscSF rowsf,sf; 1635 IS parcolp = NULL; 1636 PetscBool done; 1637 PetscErrorCode ierr; 1638 1639 PetscFunctionBegin; 1640 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1641 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1642 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1643 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1644 1645 /* Invert row permutation to find out where my rows should go */ 1646 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1647 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1648 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1649 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1650 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1651 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1652 1653 /* Invert column permutation to find out where my columns should go */ 1654 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1655 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1656 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1657 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1658 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1659 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1660 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1661 1662 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1663 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1664 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1665 1666 /* Find out where my gcols should go */ 1667 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1668 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1669 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1670 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1671 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1672 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1674 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1675 1676 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1677 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1678 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1679 for (i=0; i<m; i++) { 1680 PetscInt row = rdest[i],rowner; 1681 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1682 for (j=ai[i]; j<ai[i+1]; j++) { 1683 PetscInt cowner,col = cdest[aj[j]]; 1684 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1685 if (rowner == cowner) dnnz[i]++; 1686 else onnz[i]++; 1687 } 1688 for (j=bi[i]; j<bi[i+1]; j++) { 1689 PetscInt cowner,col = gcdest[bj[j]]; 1690 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1691 if (rowner == cowner) dnnz[i]++; 1692 else onnz[i]++; 1693 } 1694 } 1695 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1696 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1699 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1700 1701 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1702 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1703 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1704 for (i=0; i<m; i++) { 1705 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1706 PetscInt j0,rowlen; 1707 rowlen = ai[i+1] - ai[i]; 1708 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1709 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1710 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1711 } 1712 rowlen = bi[i+1] - bi[i]; 1713 for (j0=j=0; j<rowlen; j0=j) { 1714 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1715 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1716 } 1717 } 1718 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1719 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1720 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1721 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1722 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1723 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1724 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1725 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1726 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1727 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1728 *B = Aperm; 1729 PetscFunctionReturn(0); 1730 } 1731 1732 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1733 { 1734 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1739 if (ghosts) *ghosts = aij->garray; 1740 PetscFunctionReturn(0); 1741 } 1742 1743 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1744 { 1745 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1746 Mat A = mat->A,B = mat->B; 1747 PetscErrorCode ierr; 1748 PetscReal isend[5],irecv[5]; 1749 1750 PetscFunctionBegin; 1751 info->block_size = 1.0; 1752 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1753 1754 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1755 isend[3] = info->memory; isend[4] = info->mallocs; 1756 1757 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1758 1759 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1760 isend[3] += info->memory; isend[4] += info->mallocs; 1761 if (flag == MAT_LOCAL) { 1762 info->nz_used = isend[0]; 1763 info->nz_allocated = isend[1]; 1764 info->nz_unneeded = isend[2]; 1765 info->memory = isend[3]; 1766 info->mallocs = isend[4]; 1767 } else if (flag == MAT_GLOBAL_MAX) { 1768 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1769 1770 info->nz_used = irecv[0]; 1771 info->nz_allocated = irecv[1]; 1772 info->nz_unneeded = irecv[2]; 1773 info->memory = irecv[3]; 1774 info->mallocs = irecv[4]; 1775 } else if (flag == MAT_GLOBAL_SUM) { 1776 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1777 1778 info->nz_used = irecv[0]; 1779 info->nz_allocated = irecv[1]; 1780 info->nz_unneeded = irecv[2]; 1781 info->memory = irecv[3]; 1782 info->mallocs = irecv[4]; 1783 } 1784 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1785 info->fill_ratio_needed = 0; 1786 info->factor_mallocs = 0; 1787 PetscFunctionReturn(0); 1788 } 1789 1790 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1791 { 1792 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1793 PetscErrorCode ierr; 1794 1795 PetscFunctionBegin; 1796 switch (op) { 1797 case MAT_NEW_NONZERO_LOCATIONS: 1798 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1799 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1800 case MAT_KEEP_NONZERO_PATTERN: 1801 case MAT_NEW_NONZERO_LOCATION_ERR: 1802 case MAT_USE_INODES: 1803 case MAT_IGNORE_ZERO_ENTRIES: 1804 MatCheckPreallocated(A,1); 1805 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1806 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1807 break; 1808 case MAT_ROW_ORIENTED: 1809 MatCheckPreallocated(A,1); 1810 a->roworiented = flg; 1811 1812 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1813 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1814 break; 1815 case MAT_NEW_DIAGONALS: 1816 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1817 break; 1818 case MAT_IGNORE_OFF_PROC_ENTRIES: 1819 a->donotstash = flg; 1820 break; 1821 case MAT_SPD: 1822 A->spd_set = PETSC_TRUE; 1823 A->spd = flg; 1824 if (flg) { 1825 A->symmetric = PETSC_TRUE; 1826 A->structurally_symmetric = PETSC_TRUE; 1827 A->symmetric_set = PETSC_TRUE; 1828 A->structurally_symmetric_set = PETSC_TRUE; 1829 } 1830 break; 1831 case MAT_SYMMETRIC: 1832 MatCheckPreallocated(A,1); 1833 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1834 break; 1835 case MAT_STRUCTURALLY_SYMMETRIC: 1836 MatCheckPreallocated(A,1); 1837 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1838 break; 1839 case MAT_HERMITIAN: 1840 MatCheckPreallocated(A,1); 1841 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1842 break; 1843 case MAT_SYMMETRY_ETERNAL: 1844 MatCheckPreallocated(A,1); 1845 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1846 break; 1847 case MAT_SUBMAT_SINGLEIS: 1848 A->submat_singleis = flg; 1849 break; 1850 case MAT_STRUCTURE_ONLY: 1851 /* The option is handled directly by MatSetOption() */ 1852 break; 1853 default: 1854 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1860 { 1861 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1862 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1863 PetscErrorCode ierr; 1864 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1865 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1866 PetscInt *cmap,*idx_p; 1867 1868 PetscFunctionBegin; 1869 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1870 mat->getrowactive = PETSC_TRUE; 1871 1872 if (!mat->rowvalues && (idx || v)) { 1873 /* 1874 allocate enough space to hold information from the longest row. 1875 */ 1876 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1877 PetscInt max = 1,tmp; 1878 for (i=0; i<matin->rmap->n; i++) { 1879 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1880 if (max < tmp) max = tmp; 1881 } 1882 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1883 } 1884 1885 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1886 lrow = row - rstart; 1887 1888 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1889 if (!v) {pvA = 0; pvB = 0;} 1890 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1891 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1892 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1893 nztot = nzA + nzB; 1894 1895 cmap = mat->garray; 1896 if (v || idx) { 1897 if (nztot) { 1898 /* Sort by increasing column numbers, assuming A and B already sorted */ 1899 PetscInt imark = -1; 1900 if (v) { 1901 *v = v_p = mat->rowvalues; 1902 for (i=0; i<nzB; i++) { 1903 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1904 else break; 1905 } 1906 imark = i; 1907 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1908 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1909 } 1910 if (idx) { 1911 *idx = idx_p = mat->rowindices; 1912 if (imark > -1) { 1913 for (i=0; i<imark; i++) { 1914 idx_p[i] = cmap[cworkB[i]]; 1915 } 1916 } else { 1917 for (i=0; i<nzB; i++) { 1918 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1919 else break; 1920 } 1921 imark = i; 1922 } 1923 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1924 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1925 } 1926 } else { 1927 if (idx) *idx = 0; 1928 if (v) *v = 0; 1929 } 1930 } 1931 *nz = nztot; 1932 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1933 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1938 { 1939 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1940 1941 PetscFunctionBegin; 1942 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1943 aij->getrowactive = PETSC_FALSE; 1944 PetscFunctionReturn(0); 1945 } 1946 1947 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1948 { 1949 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1950 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1951 PetscErrorCode ierr; 1952 PetscInt i,j,cstart = mat->cmap->rstart; 1953 PetscReal sum = 0.0; 1954 MatScalar *v; 1955 1956 PetscFunctionBegin; 1957 if (aij->size == 1) { 1958 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1959 } else { 1960 if (type == NORM_FROBENIUS) { 1961 v = amat->a; 1962 for (i=0; i<amat->nz; i++) { 1963 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1964 } 1965 v = bmat->a; 1966 for (i=0; i<bmat->nz; i++) { 1967 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1968 } 1969 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1970 *norm = PetscSqrtReal(*norm); 1971 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1972 } else if (type == NORM_1) { /* max column norm */ 1973 PetscReal *tmp,*tmp2; 1974 PetscInt *jj,*garray = aij->garray; 1975 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1976 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1977 *norm = 0.0; 1978 v = amat->a; jj = amat->j; 1979 for (j=0; j<amat->nz; j++) { 1980 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1981 } 1982 v = bmat->a; jj = bmat->j; 1983 for (j=0; j<bmat->nz; j++) { 1984 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1985 } 1986 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1987 for (j=0; j<mat->cmap->N; j++) { 1988 if (tmp2[j] > *norm) *norm = tmp2[j]; 1989 } 1990 ierr = PetscFree(tmp);CHKERRQ(ierr); 1991 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1992 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1993 } else if (type == NORM_INFINITY) { /* max row norm */ 1994 PetscReal ntemp = 0.0; 1995 for (j=0; j<aij->A->rmap->n; j++) { 1996 v = amat->a + amat->i[j]; 1997 sum = 0.0; 1998 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1999 sum += PetscAbsScalar(*v); v++; 2000 } 2001 v = bmat->a + bmat->i[j]; 2002 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2003 sum += PetscAbsScalar(*v); v++; 2004 } 2005 if (sum > ntemp) ntemp = sum; 2006 } 2007 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2008 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2009 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2010 } 2011 PetscFunctionReturn(0); 2012 } 2013 2014 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2015 { 2016 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2017 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2018 PetscErrorCode ierr; 2019 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2020 PetscInt cstart = A->cmap->rstart,ncol; 2021 Mat B; 2022 MatScalar *array; 2023 2024 PetscFunctionBegin; 2025 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2026 ai = Aloc->i; aj = Aloc->j; 2027 bi = Bloc->i; bj = Bloc->j; 2028 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2029 PetscInt *d_nnz,*g_nnz,*o_nnz; 2030 PetscSFNode *oloc; 2031 PETSC_UNUSED PetscSF sf; 2032 2033 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2034 /* compute d_nnz for preallocation */ 2035 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2036 for (i=0; i<ai[ma]; i++) { 2037 d_nnz[aj[i]]++; 2038 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2039 } 2040 /* compute local off-diagonal contributions */ 2041 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2042 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2043 /* map those to global */ 2044 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2045 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2046 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2047 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2048 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2049 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2050 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2051 2052 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2053 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2054 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2055 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2056 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2057 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2058 } else { 2059 B = *matout; 2060 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2061 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2062 } 2063 2064 /* copy over the A part */ 2065 array = Aloc->a; 2066 row = A->rmap->rstart; 2067 for (i=0; i<ma; i++) { 2068 ncol = ai[i+1]-ai[i]; 2069 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2070 row++; 2071 array += ncol; aj += ncol; 2072 } 2073 aj = Aloc->j; 2074 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2075 2076 /* copy over the B part */ 2077 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2078 array = Bloc->a; 2079 row = A->rmap->rstart; 2080 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2081 cols_tmp = cols; 2082 for (i=0; i<mb; i++) { 2083 ncol = bi[i+1]-bi[i]; 2084 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2085 row++; 2086 array += ncol; cols_tmp += ncol; 2087 } 2088 ierr = PetscFree(cols);CHKERRQ(ierr); 2089 2090 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2091 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2092 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2093 *matout = B; 2094 } else { 2095 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2096 } 2097 PetscFunctionReturn(0); 2098 } 2099 2100 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2101 { 2102 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2103 Mat a = aij->A,b = aij->B; 2104 PetscErrorCode ierr; 2105 PetscInt s1,s2,s3; 2106 2107 PetscFunctionBegin; 2108 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2109 if (rr) { 2110 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2111 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2112 /* Overlap communication with computation. */ 2113 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2114 } 2115 if (ll) { 2116 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2117 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2118 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2119 } 2120 /* scale the diagonal block */ 2121 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2122 2123 if (rr) { 2124 /* Do a scatter end and then right scale the off-diagonal block */ 2125 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2126 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2127 } 2128 PetscFunctionReturn(0); 2129 } 2130 2131 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2132 { 2133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2134 PetscErrorCode ierr; 2135 2136 PetscFunctionBegin; 2137 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2138 PetscFunctionReturn(0); 2139 } 2140 2141 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2142 { 2143 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2144 Mat a,b,c,d; 2145 PetscBool flg; 2146 PetscErrorCode ierr; 2147 2148 PetscFunctionBegin; 2149 a = matA->A; b = matA->B; 2150 c = matB->A; d = matB->B; 2151 2152 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2153 if (flg) { 2154 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2155 } 2156 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2161 { 2162 PetscErrorCode ierr; 2163 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2164 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2165 2166 PetscFunctionBegin; 2167 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2168 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2169 /* because of the column compression in the off-processor part of the matrix a->B, 2170 the number of columns in a->B and b->B may be different, hence we cannot call 2171 the MatCopy() directly on the two parts. If need be, we can provide a more 2172 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2173 then copying the submatrices */ 2174 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2175 } else { 2176 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2177 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2178 } 2179 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2180 PetscFunctionReturn(0); 2181 } 2182 2183 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2184 { 2185 PetscErrorCode ierr; 2186 2187 PetscFunctionBegin; 2188 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2189 PetscFunctionReturn(0); 2190 } 2191 2192 /* 2193 Computes the number of nonzeros per row needed for preallocation when X and Y 2194 have different nonzero structure. 2195 */ 2196 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2197 { 2198 PetscInt i,j,k,nzx,nzy; 2199 2200 PetscFunctionBegin; 2201 /* Set the number of nonzeros in the new matrix */ 2202 for (i=0; i<m; i++) { 2203 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2204 nzx = xi[i+1] - xi[i]; 2205 nzy = yi[i+1] - yi[i]; 2206 nnz[i] = 0; 2207 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2208 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2209 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2210 nnz[i]++; 2211 } 2212 for (; k<nzy; k++) nnz[i]++; 2213 } 2214 PetscFunctionReturn(0); 2215 } 2216 2217 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2218 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2219 { 2220 PetscErrorCode ierr; 2221 PetscInt m = Y->rmap->N; 2222 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2223 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2224 2225 PetscFunctionBegin; 2226 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2227 PetscFunctionReturn(0); 2228 } 2229 2230 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2231 { 2232 PetscErrorCode ierr; 2233 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2234 PetscBLASInt bnz,one=1; 2235 Mat_SeqAIJ *x,*y; 2236 2237 PetscFunctionBegin; 2238 if (str == SAME_NONZERO_PATTERN) { 2239 PetscScalar alpha = a; 2240 x = (Mat_SeqAIJ*)xx->A->data; 2241 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2242 y = (Mat_SeqAIJ*)yy->A->data; 2243 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2244 x = (Mat_SeqAIJ*)xx->B->data; 2245 y = (Mat_SeqAIJ*)yy->B->data; 2246 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2247 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2248 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2249 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2250 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2251 } else { 2252 Mat B; 2253 PetscInt *nnz_d,*nnz_o; 2254 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2255 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2256 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2257 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2258 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2259 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2260 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2261 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2262 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2263 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2264 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2265 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2266 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2267 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2268 } 2269 PetscFunctionReturn(0); 2270 } 2271 2272 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2273 2274 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2275 { 2276 #if defined(PETSC_USE_COMPLEX) 2277 PetscErrorCode ierr; 2278 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2279 2280 PetscFunctionBegin; 2281 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2282 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2283 #else 2284 PetscFunctionBegin; 2285 #endif 2286 PetscFunctionReturn(0); 2287 } 2288 2289 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2290 { 2291 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2292 PetscErrorCode ierr; 2293 2294 PetscFunctionBegin; 2295 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2296 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2297 PetscFunctionReturn(0); 2298 } 2299 2300 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2301 { 2302 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2303 PetscErrorCode ierr; 2304 2305 PetscFunctionBegin; 2306 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2307 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2308 PetscFunctionReturn(0); 2309 } 2310 2311 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2312 { 2313 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2314 PetscErrorCode ierr; 2315 PetscInt i,*idxb = 0; 2316 PetscScalar *va,*vb; 2317 Vec vtmp; 2318 2319 PetscFunctionBegin; 2320 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2321 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2322 if (idx) { 2323 for (i=0; i<A->rmap->n; i++) { 2324 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2325 } 2326 } 2327 2328 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2329 if (idx) { 2330 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2331 } 2332 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2333 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2334 2335 for (i=0; i<A->rmap->n; i++) { 2336 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2337 va[i] = vb[i]; 2338 if (idx) idx[i] = a->garray[idxb[i]]; 2339 } 2340 } 2341 2342 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2343 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2344 ierr = PetscFree(idxb);CHKERRQ(ierr); 2345 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2346 PetscFunctionReturn(0); 2347 } 2348 2349 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2350 { 2351 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2352 PetscErrorCode ierr; 2353 PetscInt i,*idxb = 0; 2354 PetscScalar *va,*vb; 2355 Vec vtmp; 2356 2357 PetscFunctionBegin; 2358 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2359 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2360 if (idx) { 2361 for (i=0; i<A->cmap->n; i++) { 2362 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2363 } 2364 } 2365 2366 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2367 if (idx) { 2368 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2369 } 2370 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2371 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2372 2373 for (i=0; i<A->rmap->n; i++) { 2374 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2375 va[i] = vb[i]; 2376 if (idx) idx[i] = a->garray[idxb[i]]; 2377 } 2378 } 2379 2380 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2381 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2382 ierr = PetscFree(idxb);CHKERRQ(ierr); 2383 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2384 PetscFunctionReturn(0); 2385 } 2386 2387 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2388 { 2389 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2390 PetscInt n = A->rmap->n; 2391 PetscInt cstart = A->cmap->rstart; 2392 PetscInt *cmap = mat->garray; 2393 PetscInt *diagIdx, *offdiagIdx; 2394 Vec diagV, offdiagV; 2395 PetscScalar *a, *diagA, *offdiagA; 2396 PetscInt r; 2397 PetscErrorCode ierr; 2398 2399 PetscFunctionBegin; 2400 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2401 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2402 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2403 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2404 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2405 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2406 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2407 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2408 for (r = 0; r < n; ++r) { 2409 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2410 a[r] = diagA[r]; 2411 idx[r] = cstart + diagIdx[r]; 2412 } else { 2413 a[r] = offdiagA[r]; 2414 idx[r] = cmap[offdiagIdx[r]]; 2415 } 2416 } 2417 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2418 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2419 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2420 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2421 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2422 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2427 { 2428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2429 PetscInt n = A->rmap->n; 2430 PetscInt cstart = A->cmap->rstart; 2431 PetscInt *cmap = mat->garray; 2432 PetscInt *diagIdx, *offdiagIdx; 2433 Vec diagV, offdiagV; 2434 PetscScalar *a, *diagA, *offdiagA; 2435 PetscInt r; 2436 PetscErrorCode ierr; 2437 2438 PetscFunctionBegin; 2439 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2440 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2441 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2442 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2443 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2444 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2445 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2446 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2447 for (r = 0; r < n; ++r) { 2448 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2449 a[r] = diagA[r]; 2450 idx[r] = cstart + diagIdx[r]; 2451 } else { 2452 a[r] = offdiagA[r]; 2453 idx[r] = cmap[offdiagIdx[r]]; 2454 } 2455 } 2456 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2457 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2458 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2459 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2460 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2461 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2466 { 2467 PetscErrorCode ierr; 2468 Mat *dummy; 2469 2470 PetscFunctionBegin; 2471 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2472 *newmat = *dummy; 2473 ierr = PetscFree(dummy);CHKERRQ(ierr); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2478 { 2479 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2480 PetscErrorCode ierr; 2481 2482 PetscFunctionBegin; 2483 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2484 A->factorerrortype = a->A->factorerrortype; 2485 PetscFunctionReturn(0); 2486 } 2487 2488 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2489 { 2490 PetscErrorCode ierr; 2491 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2492 2493 PetscFunctionBegin; 2494 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2495 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2496 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2497 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2498 PetscFunctionReturn(0); 2499 } 2500 2501 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2502 { 2503 PetscFunctionBegin; 2504 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2505 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2506 PetscFunctionReturn(0); 2507 } 2508 2509 /*@ 2510 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2511 2512 Collective on Mat 2513 2514 Input Parameters: 2515 + A - the matrix 2516 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2517 2518 Level: advanced 2519 2520 @*/ 2521 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2522 { 2523 PetscErrorCode ierr; 2524 2525 PetscFunctionBegin; 2526 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2531 { 2532 PetscErrorCode ierr; 2533 PetscBool sc = PETSC_FALSE,flg; 2534 2535 PetscFunctionBegin; 2536 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2537 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2538 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2539 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2540 if (flg) { 2541 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2542 } 2543 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2544 PetscFunctionReturn(0); 2545 } 2546 2547 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2548 { 2549 PetscErrorCode ierr; 2550 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2551 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2552 2553 PetscFunctionBegin; 2554 if (!Y->preallocated) { 2555 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2556 } else if (!aij->nz) { 2557 PetscInt nonew = aij->nonew; 2558 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2559 aij->nonew = nonew; 2560 } 2561 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2566 { 2567 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2568 PetscErrorCode ierr; 2569 2570 PetscFunctionBegin; 2571 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2572 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2573 if (d) { 2574 PetscInt rstart; 2575 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2576 *d += rstart; 2577 2578 } 2579 PetscFunctionReturn(0); 2580 } 2581 2582 2583 /* -------------------------------------------------------------------*/ 2584 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2585 MatGetRow_MPIAIJ, 2586 MatRestoreRow_MPIAIJ, 2587 MatMult_MPIAIJ, 2588 /* 4*/ MatMultAdd_MPIAIJ, 2589 MatMultTranspose_MPIAIJ, 2590 MatMultTransposeAdd_MPIAIJ, 2591 0, 2592 0, 2593 0, 2594 /*10*/ 0, 2595 0, 2596 0, 2597 MatSOR_MPIAIJ, 2598 MatTranspose_MPIAIJ, 2599 /*15*/ MatGetInfo_MPIAIJ, 2600 MatEqual_MPIAIJ, 2601 MatGetDiagonal_MPIAIJ, 2602 MatDiagonalScale_MPIAIJ, 2603 MatNorm_MPIAIJ, 2604 /*20*/ MatAssemblyBegin_MPIAIJ, 2605 MatAssemblyEnd_MPIAIJ, 2606 MatSetOption_MPIAIJ, 2607 MatZeroEntries_MPIAIJ, 2608 /*24*/ MatZeroRows_MPIAIJ, 2609 0, 2610 0, 2611 0, 2612 0, 2613 /*29*/ MatSetUp_MPIAIJ, 2614 0, 2615 0, 2616 MatGetDiagonalBlock_MPIAIJ, 2617 0, 2618 /*34*/ MatDuplicate_MPIAIJ, 2619 0, 2620 0, 2621 0, 2622 0, 2623 /*39*/ MatAXPY_MPIAIJ, 2624 MatCreateSubMatrices_MPIAIJ, 2625 MatIncreaseOverlap_MPIAIJ, 2626 MatGetValues_MPIAIJ, 2627 MatCopy_MPIAIJ, 2628 /*44*/ MatGetRowMax_MPIAIJ, 2629 MatScale_MPIAIJ, 2630 MatShift_MPIAIJ, 2631 MatDiagonalSet_MPIAIJ, 2632 MatZeroRowsColumns_MPIAIJ, 2633 /*49*/ MatSetRandom_MPIAIJ, 2634 0, 2635 0, 2636 0, 2637 0, 2638 /*54*/ MatFDColoringCreate_MPIXAIJ, 2639 0, 2640 MatSetUnfactored_MPIAIJ, 2641 MatPermute_MPIAIJ, 2642 0, 2643 /*59*/ MatCreateSubMatrix_MPIAIJ, 2644 MatDestroy_MPIAIJ, 2645 MatView_MPIAIJ, 2646 0, 2647 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2648 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2649 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2650 0, 2651 0, 2652 0, 2653 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2654 MatGetRowMinAbs_MPIAIJ, 2655 0, 2656 0, 2657 0, 2658 0, 2659 /*75*/ MatFDColoringApply_AIJ, 2660 MatSetFromOptions_MPIAIJ, 2661 0, 2662 0, 2663 MatFindZeroDiagonals_MPIAIJ, 2664 /*80*/ 0, 2665 0, 2666 0, 2667 /*83*/ MatLoad_MPIAIJ, 2668 MatIsSymmetric_MPIAIJ, 2669 0, 2670 0, 2671 0, 2672 0, 2673 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2674 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2675 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2676 MatPtAP_MPIAIJ_MPIAIJ, 2677 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2678 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /*99*/ 0, 2684 0, 2685 0, 2686 MatConjugate_MPIAIJ, 2687 0, 2688 /*104*/MatSetValuesRow_MPIAIJ, 2689 MatRealPart_MPIAIJ, 2690 MatImaginaryPart_MPIAIJ, 2691 0, 2692 0, 2693 /*109*/0, 2694 0, 2695 MatGetRowMin_MPIAIJ, 2696 0, 2697 MatMissingDiagonal_MPIAIJ, 2698 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2699 0, 2700 MatGetGhosts_MPIAIJ, 2701 0, 2702 0, 2703 /*119*/0, 2704 0, 2705 0, 2706 0, 2707 MatGetMultiProcBlock_MPIAIJ, 2708 /*124*/MatFindNonzeroRows_MPIAIJ, 2709 MatGetColumnNorms_MPIAIJ, 2710 MatInvertBlockDiagonal_MPIAIJ, 2711 0, 2712 MatCreateSubMatricesMPI_MPIAIJ, 2713 /*129*/0, 2714 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2715 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2716 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2717 0, 2718 /*134*/0, 2719 0, 2720 MatRARt_MPIAIJ_MPIAIJ, 2721 0, 2722 0, 2723 /*139*/MatSetBlockSizes_MPIAIJ, 2724 0, 2725 0, 2726 MatFDColoringSetUp_MPIXAIJ, 2727 MatFindOffBlockDiagonalEntries_MPIAIJ, 2728 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2729 }; 2730 2731 /* ----------------------------------------------------------------------------------------*/ 2732 2733 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2734 { 2735 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2736 PetscErrorCode ierr; 2737 2738 PetscFunctionBegin; 2739 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2740 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2741 PetscFunctionReturn(0); 2742 } 2743 2744 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2745 { 2746 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2747 PetscErrorCode ierr; 2748 2749 PetscFunctionBegin; 2750 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2751 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2752 PetscFunctionReturn(0); 2753 } 2754 2755 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2756 { 2757 Mat_MPIAIJ *b; 2758 PetscErrorCode ierr; 2759 2760 PetscFunctionBegin; 2761 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2762 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2763 b = (Mat_MPIAIJ*)B->data; 2764 2765 #if defined(PETSC_USE_CTABLE) 2766 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2767 #else 2768 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2769 #endif 2770 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2771 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2772 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2773 2774 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2775 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2776 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2777 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2778 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2779 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2780 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2781 2782 if (!B->preallocated) { 2783 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2784 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2785 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2786 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2787 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2788 } 2789 2790 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2791 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2792 B->preallocated = PETSC_TRUE; 2793 B->was_assembled = PETSC_FALSE; 2794 B->assembled = PETSC_FALSE;; 2795 PetscFunctionReturn(0); 2796 } 2797 2798 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2799 { 2800 Mat_MPIAIJ *b; 2801 PetscErrorCode ierr; 2802 2803 PetscFunctionBegin; 2804 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2805 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2806 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2807 b = (Mat_MPIAIJ*)B->data; 2808 2809 #if defined(PETSC_USE_CTABLE) 2810 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2811 #else 2812 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2813 #endif 2814 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2815 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2816 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2817 2818 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2819 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2820 B->preallocated = PETSC_TRUE; 2821 B->was_assembled = PETSC_FALSE; 2822 B->assembled = PETSC_FALSE; 2823 PetscFunctionReturn(0); 2824 } 2825 2826 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2827 { 2828 Mat mat; 2829 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2830 PetscErrorCode ierr; 2831 2832 PetscFunctionBegin; 2833 *newmat = 0; 2834 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2835 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2836 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2837 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2838 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2839 a = (Mat_MPIAIJ*)mat->data; 2840 2841 mat->factortype = matin->factortype; 2842 mat->assembled = PETSC_TRUE; 2843 mat->insertmode = NOT_SET_VALUES; 2844 mat->preallocated = PETSC_TRUE; 2845 2846 a->size = oldmat->size; 2847 a->rank = oldmat->rank; 2848 a->donotstash = oldmat->donotstash; 2849 a->roworiented = oldmat->roworiented; 2850 a->rowindices = 0; 2851 a->rowvalues = 0; 2852 a->getrowactive = PETSC_FALSE; 2853 2854 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2855 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2856 2857 if (oldmat->colmap) { 2858 #if defined(PETSC_USE_CTABLE) 2859 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2860 #else 2861 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2862 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2863 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2864 #endif 2865 } else a->colmap = 0; 2866 if (oldmat->garray) { 2867 PetscInt len; 2868 len = oldmat->B->cmap->n; 2869 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2870 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2871 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2872 } else a->garray = 0; 2873 2874 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2875 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2876 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2877 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2878 2879 if (oldmat->Mvctx_mpi1) { 2880 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2881 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2882 } 2883 2884 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2885 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2886 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2887 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2888 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2889 *newmat = mat; 2890 PetscFunctionReturn(0); 2891 } 2892 2893 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2894 { 2895 PetscScalar *vals,*svals; 2896 MPI_Comm comm; 2897 PetscErrorCode ierr; 2898 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2899 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2900 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2901 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2902 PetscInt cend,cstart,n,*rowners; 2903 int fd; 2904 PetscInt bs = newMat->rmap->bs; 2905 2906 PetscFunctionBegin; 2907 /* force binary viewer to load .info file if it has not yet done so */ 2908 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2909 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2910 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2911 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2912 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2913 if (!rank) { 2914 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2915 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2916 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2917 } 2918 2919 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2920 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2921 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2922 if (bs < 0) bs = 1; 2923 2924 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2925 M = header[1]; N = header[2]; 2926 2927 /* If global sizes are set, check if they are consistent with that given in the file */ 2928 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2929 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2930 2931 /* determine ownership of all (block) rows */ 2932 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2933 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2934 else m = newMat->rmap->n; /* Set by user */ 2935 2936 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2937 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2938 2939 /* First process needs enough room for process with most rows */ 2940 if (!rank) { 2941 mmax = rowners[1]; 2942 for (i=2; i<=size; i++) { 2943 mmax = PetscMax(mmax, rowners[i]); 2944 } 2945 } else mmax = -1; /* unused, but compilers complain */ 2946 2947 rowners[0] = 0; 2948 for (i=2; i<=size; i++) { 2949 rowners[i] += rowners[i-1]; 2950 } 2951 rstart = rowners[rank]; 2952 rend = rowners[rank+1]; 2953 2954 /* distribute row lengths to all processors */ 2955 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2956 if (!rank) { 2957 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2958 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2959 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2960 for (j=0; j<m; j++) { 2961 procsnz[0] += ourlens[j]; 2962 } 2963 for (i=1; i<size; i++) { 2964 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2965 /* calculate the number of nonzeros on each processor */ 2966 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2967 procsnz[i] += rowlengths[j]; 2968 } 2969 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2970 } 2971 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2972 } else { 2973 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2974 } 2975 2976 if (!rank) { 2977 /* determine max buffer needed and allocate it */ 2978 maxnz = 0; 2979 for (i=0; i<size; i++) { 2980 maxnz = PetscMax(maxnz,procsnz[i]); 2981 } 2982 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2983 2984 /* read in my part of the matrix column indices */ 2985 nz = procsnz[0]; 2986 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2987 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2988 2989 /* read in every one elses and ship off */ 2990 for (i=1; i<size; i++) { 2991 nz = procsnz[i]; 2992 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2993 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2994 } 2995 ierr = PetscFree(cols);CHKERRQ(ierr); 2996 } else { 2997 /* determine buffer space needed for message */ 2998 nz = 0; 2999 for (i=0; i<m; i++) { 3000 nz += ourlens[i]; 3001 } 3002 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3003 3004 /* receive message of column indices*/ 3005 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3006 } 3007 3008 /* determine column ownership if matrix is not square */ 3009 if (N != M) { 3010 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3011 else n = newMat->cmap->n; 3012 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3013 cstart = cend - n; 3014 } else { 3015 cstart = rstart; 3016 cend = rend; 3017 n = cend - cstart; 3018 } 3019 3020 /* loop over local rows, determining number of off diagonal entries */ 3021 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3022 jj = 0; 3023 for (i=0; i<m; i++) { 3024 for (j=0; j<ourlens[i]; j++) { 3025 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3026 jj++; 3027 } 3028 } 3029 3030 for (i=0; i<m; i++) { 3031 ourlens[i] -= offlens[i]; 3032 } 3033 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3034 3035 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3036 3037 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3038 3039 for (i=0; i<m; i++) { 3040 ourlens[i] += offlens[i]; 3041 } 3042 3043 if (!rank) { 3044 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3045 3046 /* read in my part of the matrix numerical values */ 3047 nz = procsnz[0]; 3048 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3049 3050 /* insert into matrix */ 3051 jj = rstart; 3052 smycols = mycols; 3053 svals = vals; 3054 for (i=0; i<m; i++) { 3055 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3056 smycols += ourlens[i]; 3057 svals += ourlens[i]; 3058 jj++; 3059 } 3060 3061 /* read in other processors and ship out */ 3062 for (i=1; i<size; i++) { 3063 nz = procsnz[i]; 3064 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3065 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3066 } 3067 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3068 } else { 3069 /* receive numeric values */ 3070 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3071 3072 /* receive message of values*/ 3073 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3074 3075 /* insert into matrix */ 3076 jj = rstart; 3077 smycols = mycols; 3078 svals = vals; 3079 for (i=0; i<m; i++) { 3080 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3081 smycols += ourlens[i]; 3082 svals += ourlens[i]; 3083 jj++; 3084 } 3085 } 3086 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3087 ierr = PetscFree(vals);CHKERRQ(ierr); 3088 ierr = PetscFree(mycols);CHKERRQ(ierr); 3089 ierr = PetscFree(rowners);CHKERRQ(ierr); 3090 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3091 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3092 PetscFunctionReturn(0); 3093 } 3094 3095 /* Not scalable because of ISAllGather() unless getting all columns. */ 3096 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3097 { 3098 PetscErrorCode ierr; 3099 IS iscol_local; 3100 PetscBool isstride; 3101 PetscMPIInt lisstride=0,gisstride; 3102 3103 PetscFunctionBegin; 3104 /* check if we are grabbing all columns*/ 3105 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3106 3107 if (isstride) { 3108 PetscInt start,len,mstart,mlen; 3109 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3110 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3111 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3112 if (mstart == start && mlen-mstart == len) lisstride = 1; 3113 } 3114 3115 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3116 if (gisstride) { 3117 PetscInt N; 3118 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3119 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3120 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3121 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3122 } else { 3123 PetscInt cbs; 3124 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3125 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3126 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3127 } 3128 3129 *isseq = iscol_local; 3130 PetscFunctionReturn(0); 3131 } 3132 3133 /* 3134 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3135 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3136 3137 Input Parameters: 3138 mat - matrix 3139 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3140 i.e., mat->rstart <= isrow[i] < mat->rend 3141 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3142 i.e., mat->cstart <= iscol[i] < mat->cend 3143 Output Parameter: 3144 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3145 iscol_o - sequential column index set for retrieving mat->B 3146 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3147 */ 3148 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3149 { 3150 PetscErrorCode ierr; 3151 Vec x,cmap; 3152 const PetscInt *is_idx; 3153 PetscScalar *xarray,*cmaparray; 3154 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3155 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3156 Mat B=a->B; 3157 Vec lvec=a->lvec,lcmap; 3158 PetscInt i,cstart,cend,Bn=B->cmap->N; 3159 MPI_Comm comm; 3160 VecScatter Mvctx=a->Mvctx; 3161 3162 PetscFunctionBegin; 3163 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3164 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3165 3166 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3167 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3168 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3169 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3170 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3171 3172 /* Get start indices */ 3173 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3174 isstart -= ncols; 3175 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3176 3177 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3178 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3179 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3180 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3181 for (i=0; i<ncols; i++) { 3182 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3183 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3184 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3185 } 3186 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3187 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3188 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3189 3190 /* Get iscol_d */ 3191 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3192 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3193 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3194 3195 /* Get isrow_d */ 3196 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3197 rstart = mat->rmap->rstart; 3198 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3199 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3200 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3201 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3202 3203 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3204 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3205 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3206 3207 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3208 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3209 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3210 3211 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3212 3213 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3214 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3215 3216 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3217 /* off-process column indices */ 3218 count = 0; 3219 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3220 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3221 3222 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3223 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3224 for (i=0; i<Bn; i++) { 3225 if (PetscRealPart(xarray[i]) > -1.0) { 3226 idx[count] = i; /* local column index in off-diagonal part B */ 3227 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3228 count++; 3229 } 3230 } 3231 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3232 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3233 3234 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3235 /* cannot ensure iscol_o has same blocksize as iscol! */ 3236 3237 ierr = PetscFree(idx);CHKERRQ(ierr); 3238 *garray = cmap1; 3239 3240 ierr = VecDestroy(&x);CHKERRQ(ierr); 3241 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3242 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3243 PetscFunctionReturn(0); 3244 } 3245 3246 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3247 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3248 { 3249 PetscErrorCode ierr; 3250 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3251 Mat M = NULL; 3252 MPI_Comm comm; 3253 IS iscol_d,isrow_d,iscol_o; 3254 Mat Asub = NULL,Bsub = NULL; 3255 PetscInt n; 3256 3257 PetscFunctionBegin; 3258 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3259 3260 if (call == MAT_REUSE_MATRIX) { 3261 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3262 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3263 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3264 3265 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3266 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3267 3268 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3269 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3270 3271 /* Update diagonal and off-diagonal portions of submat */ 3272 asub = (Mat_MPIAIJ*)(*submat)->data; 3273 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3274 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3275 if (n) { 3276 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3277 } 3278 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3279 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3280 3281 } else { /* call == MAT_INITIAL_MATRIX) */ 3282 const PetscInt *garray; 3283 PetscInt BsubN; 3284 3285 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3286 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3287 3288 /* Create local submatrices Asub and Bsub */ 3289 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3290 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3291 3292 /* Create submatrix M */ 3293 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3294 3295 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3296 asub = (Mat_MPIAIJ*)M->data; 3297 3298 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3299 n = asub->B->cmap->N; 3300 if (BsubN > n) { 3301 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3302 const PetscInt *idx; 3303 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3304 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3305 3306 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3307 j = 0; 3308 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3309 for (i=0; i<n; i++) { 3310 if (j >= BsubN) break; 3311 while (subgarray[i] > garray[j]) j++; 3312 3313 if (subgarray[i] == garray[j]) { 3314 idx_new[i] = idx[j++]; 3315 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3316 } 3317 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3318 3319 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3320 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3321 3322 } else if (BsubN < n) { 3323 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3324 } 3325 3326 ierr = PetscFree(garray);CHKERRQ(ierr); 3327 *submat = M; 3328 3329 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3330 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3331 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3332 3333 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3334 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3335 3336 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3337 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3338 } 3339 PetscFunctionReturn(0); 3340 } 3341 3342 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3343 { 3344 PetscErrorCode ierr; 3345 IS iscol_local=NULL,isrow_d; 3346 PetscInt csize; 3347 PetscInt n,i,j,start,end; 3348 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3349 MPI_Comm comm; 3350 3351 PetscFunctionBegin; 3352 /* If isrow has same processor distribution as mat, 3353 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3354 if (call == MAT_REUSE_MATRIX) { 3355 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3356 if (isrow_d) { 3357 sameRowDist = PETSC_TRUE; 3358 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3359 } else { 3360 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3361 if (iscol_local) { 3362 sameRowDist = PETSC_TRUE; 3363 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3364 } 3365 } 3366 } else { 3367 /* Check if isrow has same processor distribution as mat */ 3368 sameDist[0] = PETSC_FALSE; 3369 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3370 if (!n) { 3371 sameDist[0] = PETSC_TRUE; 3372 } else { 3373 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3374 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3375 if (i >= start && j < end) { 3376 sameDist[0] = PETSC_TRUE; 3377 } 3378 } 3379 3380 /* Check if iscol has same processor distribution as mat */ 3381 sameDist[1] = PETSC_FALSE; 3382 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3383 if (!n) { 3384 sameDist[1] = PETSC_TRUE; 3385 } else { 3386 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3387 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3388 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3389 } 3390 3391 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3392 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3393 sameRowDist = tsameDist[0]; 3394 } 3395 3396 if (sameRowDist) { 3397 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3398 /* isrow and iscol have same processor distribution as mat */ 3399 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3400 PetscFunctionReturn(0); 3401 } else { /* sameRowDist */ 3402 /* isrow has same processor distribution as mat */ 3403 if (call == MAT_INITIAL_MATRIX) { 3404 PetscBool sorted; 3405 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3406 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3407 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3408 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3409 3410 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3411 if (sorted) { 3412 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3413 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3414 PetscFunctionReturn(0); 3415 } 3416 } else { /* call == MAT_REUSE_MATRIX */ 3417 IS iscol_sub; 3418 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3419 if (iscol_sub) { 3420 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3421 PetscFunctionReturn(0); 3422 } 3423 } 3424 } 3425 } 3426 3427 /* General case: iscol -> iscol_local which has global size of iscol */ 3428 if (call == MAT_REUSE_MATRIX) { 3429 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3430 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3431 } else { 3432 if (!iscol_local) { 3433 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3434 } 3435 } 3436 3437 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3438 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3439 3440 if (call == MAT_INITIAL_MATRIX) { 3441 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3442 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3443 } 3444 PetscFunctionReturn(0); 3445 } 3446 3447 /*@C 3448 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3449 and "off-diagonal" part of the matrix in CSR format. 3450 3451 Collective on MPI_Comm 3452 3453 Input Parameters: 3454 + comm - MPI communicator 3455 . A - "diagonal" portion of matrix 3456 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3457 - garray - global index of B columns 3458 3459 Output Parameter: 3460 . mat - the matrix, with input A as its local diagonal matrix 3461 Level: advanced 3462 3463 Notes: 3464 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3465 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3466 3467 .seealso: MatCreateMPIAIJWithSplitArrays() 3468 @*/ 3469 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3470 { 3471 PetscErrorCode ierr; 3472 Mat_MPIAIJ *maij; 3473 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3474 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3475 PetscScalar *oa=b->a; 3476 Mat Bnew; 3477 PetscInt m,n,N; 3478 3479 PetscFunctionBegin; 3480 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3481 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3482 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3483 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3484 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3485 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3486 3487 /* Get global columns of mat */ 3488 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3489 3490 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3491 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3492 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3493 maij = (Mat_MPIAIJ*)(*mat)->data; 3494 3495 (*mat)->preallocated = PETSC_TRUE; 3496 3497 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3498 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3499 3500 /* Set A as diagonal portion of *mat */ 3501 maij->A = A; 3502 3503 nz = oi[m]; 3504 for (i=0; i<nz; i++) { 3505 col = oj[i]; 3506 oj[i] = garray[col]; 3507 } 3508 3509 /* Set Bnew as off-diagonal portion of *mat */ 3510 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3511 bnew = (Mat_SeqAIJ*)Bnew->data; 3512 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3513 maij->B = Bnew; 3514 3515 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3516 3517 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3518 b->free_a = PETSC_FALSE; 3519 b->free_ij = PETSC_FALSE; 3520 ierr = MatDestroy(&B);CHKERRQ(ierr); 3521 3522 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3523 bnew->free_a = PETSC_TRUE; 3524 bnew->free_ij = PETSC_TRUE; 3525 3526 /* condense columns of maij->B */ 3527 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3528 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3529 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3530 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3531 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3532 PetscFunctionReturn(0); 3533 } 3534 3535 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3536 3537 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3538 { 3539 PetscErrorCode ierr; 3540 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3541 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3542 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3543 Mat M,Msub,B=a->B; 3544 MatScalar *aa; 3545 Mat_SeqAIJ *aij; 3546 PetscInt *garray = a->garray,*colsub,Ncols; 3547 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3548 IS iscol_sub,iscmap; 3549 const PetscInt *is_idx,*cmap; 3550 PetscBool allcolumns=PETSC_FALSE; 3551 MPI_Comm comm; 3552 3553 PetscFunctionBegin; 3554 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3555 3556 if (call == MAT_REUSE_MATRIX) { 3557 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3558 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3559 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3560 3561 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3562 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3563 3564 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3565 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3566 3567 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3568 3569 } else { /* call == MAT_INITIAL_MATRIX) */ 3570 PetscBool flg; 3571 3572 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3573 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3574 3575 /* (1) iscol -> nonscalable iscol_local */ 3576 /* Check for special case: each processor gets entire matrix columns */ 3577 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3578 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3579 if (allcolumns) { 3580 iscol_sub = iscol_local; 3581 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3582 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3583 3584 } else { 3585 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3586 PetscInt *idx,*cmap1,k; 3587 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3588 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3589 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3590 count = 0; 3591 k = 0; 3592 for (i=0; i<Ncols; i++) { 3593 j = is_idx[i]; 3594 if (j >= cstart && j < cend) { 3595 /* diagonal part of mat */ 3596 idx[count] = j; 3597 cmap1[count++] = i; /* column index in submat */ 3598 } else if (Bn) { 3599 /* off-diagonal part of mat */ 3600 if (j == garray[k]) { 3601 idx[count] = j; 3602 cmap1[count++] = i; /* column index in submat */ 3603 } else if (j > garray[k]) { 3604 while (j > garray[k] && k < Bn-1) k++; 3605 if (j == garray[k]) { 3606 idx[count] = j; 3607 cmap1[count++] = i; /* column index in submat */ 3608 } 3609 } 3610 } 3611 } 3612 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3613 3614 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3615 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3616 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3617 3618 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3619 } 3620 3621 /* (3) Create sequential Msub */ 3622 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3623 } 3624 3625 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3626 aij = (Mat_SeqAIJ*)(Msub)->data; 3627 ii = aij->i; 3628 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3629 3630 /* 3631 m - number of local rows 3632 Ncols - number of columns (same on all processors) 3633 rstart - first row in new global matrix generated 3634 */ 3635 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3636 3637 if (call == MAT_INITIAL_MATRIX) { 3638 /* (4) Create parallel newmat */ 3639 PetscMPIInt rank,size; 3640 PetscInt csize; 3641 3642 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3643 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3644 3645 /* 3646 Determine the number of non-zeros in the diagonal and off-diagonal 3647 portions of the matrix in order to do correct preallocation 3648 */ 3649 3650 /* first get start and end of "diagonal" columns */ 3651 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3652 if (csize == PETSC_DECIDE) { 3653 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3654 if (mglobal == Ncols) { /* square matrix */ 3655 nlocal = m; 3656 } else { 3657 nlocal = Ncols/size + ((Ncols % size) > rank); 3658 } 3659 } else { 3660 nlocal = csize; 3661 } 3662 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3663 rstart = rend - nlocal; 3664 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3665 3666 /* next, compute all the lengths */ 3667 jj = aij->j; 3668 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3669 olens = dlens + m; 3670 for (i=0; i<m; i++) { 3671 jend = ii[i+1] - ii[i]; 3672 olen = 0; 3673 dlen = 0; 3674 for (j=0; j<jend; j++) { 3675 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3676 else dlen++; 3677 jj++; 3678 } 3679 olens[i] = olen; 3680 dlens[i] = dlen; 3681 } 3682 3683 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3684 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3685 3686 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3687 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3688 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3689 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3690 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3691 ierr = PetscFree(dlens);CHKERRQ(ierr); 3692 3693 } else { /* call == MAT_REUSE_MATRIX */ 3694 M = *newmat; 3695 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3696 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3697 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3698 /* 3699 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3700 rather than the slower MatSetValues(). 3701 */ 3702 M->was_assembled = PETSC_TRUE; 3703 M->assembled = PETSC_FALSE; 3704 } 3705 3706 /* (5) Set values of Msub to *newmat */ 3707 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3708 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3709 3710 jj = aij->j; 3711 aa = aij->a; 3712 for (i=0; i<m; i++) { 3713 row = rstart + i; 3714 nz = ii[i+1] - ii[i]; 3715 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3716 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3717 jj += nz; aa += nz; 3718 } 3719 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3720 3721 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3722 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3723 3724 ierr = PetscFree(colsub);CHKERRQ(ierr); 3725 3726 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3727 if (call == MAT_INITIAL_MATRIX) { 3728 *newmat = M; 3729 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3730 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3731 3732 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3733 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3734 3735 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3736 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3737 3738 if (iscol_local) { 3739 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3740 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3741 } 3742 } 3743 PetscFunctionReturn(0); 3744 } 3745 3746 /* 3747 Not great since it makes two copies of the submatrix, first an SeqAIJ 3748 in local and then by concatenating the local matrices the end result. 3749 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3750 3751 Note: This requires a sequential iscol with all indices. 3752 */ 3753 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3754 { 3755 PetscErrorCode ierr; 3756 PetscMPIInt rank,size; 3757 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3758 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3759 Mat M,Mreuse; 3760 MatScalar *aa,*vwork; 3761 MPI_Comm comm; 3762 Mat_SeqAIJ *aij; 3763 PetscBool colflag,allcolumns=PETSC_FALSE; 3764 3765 PetscFunctionBegin; 3766 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3767 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3768 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3769 3770 /* Check for special case: each processor gets entire matrix columns */ 3771 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3772 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3773 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3774 3775 if (call == MAT_REUSE_MATRIX) { 3776 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3777 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3778 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3779 } else { 3780 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3781 } 3782 3783 /* 3784 m - number of local rows 3785 n - number of columns (same on all processors) 3786 rstart - first row in new global matrix generated 3787 */ 3788 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3789 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3790 if (call == MAT_INITIAL_MATRIX) { 3791 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3792 ii = aij->i; 3793 jj = aij->j; 3794 3795 /* 3796 Determine the number of non-zeros in the diagonal and off-diagonal 3797 portions of the matrix in order to do correct preallocation 3798 */ 3799 3800 /* first get start and end of "diagonal" columns */ 3801 if (csize == PETSC_DECIDE) { 3802 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3803 if (mglobal == n) { /* square matrix */ 3804 nlocal = m; 3805 } else { 3806 nlocal = n/size + ((n % size) > rank); 3807 } 3808 } else { 3809 nlocal = csize; 3810 } 3811 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3812 rstart = rend - nlocal; 3813 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3814 3815 /* next, compute all the lengths */ 3816 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3817 olens = dlens + m; 3818 for (i=0; i<m; i++) { 3819 jend = ii[i+1] - ii[i]; 3820 olen = 0; 3821 dlen = 0; 3822 for (j=0; j<jend; j++) { 3823 if (*jj < rstart || *jj >= rend) olen++; 3824 else dlen++; 3825 jj++; 3826 } 3827 olens[i] = olen; 3828 dlens[i] = dlen; 3829 } 3830 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3831 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3832 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3833 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3834 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3835 ierr = PetscFree(dlens);CHKERRQ(ierr); 3836 } else { 3837 PetscInt ml,nl; 3838 3839 M = *newmat; 3840 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3841 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3842 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3843 /* 3844 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3845 rather than the slower MatSetValues(). 3846 */ 3847 M->was_assembled = PETSC_TRUE; 3848 M->assembled = PETSC_FALSE; 3849 } 3850 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3851 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3852 ii = aij->i; 3853 jj = aij->j; 3854 aa = aij->a; 3855 for (i=0; i<m; i++) { 3856 row = rstart + i; 3857 nz = ii[i+1] - ii[i]; 3858 cwork = jj; jj += nz; 3859 vwork = aa; aa += nz; 3860 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3861 } 3862 3863 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3864 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3865 *newmat = M; 3866 3867 /* save submatrix used in processor for next request */ 3868 if (call == MAT_INITIAL_MATRIX) { 3869 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3870 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3871 } 3872 PetscFunctionReturn(0); 3873 } 3874 3875 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3876 { 3877 PetscInt m,cstart, cend,j,nnz,i,d; 3878 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3879 const PetscInt *JJ; 3880 PetscScalar *values; 3881 PetscErrorCode ierr; 3882 PetscBool nooffprocentries; 3883 3884 PetscFunctionBegin; 3885 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3886 3887 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3888 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3889 m = B->rmap->n; 3890 cstart = B->cmap->rstart; 3891 cend = B->cmap->rend; 3892 rstart = B->rmap->rstart; 3893 3894 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3895 3896 #if defined(PETSC_USE_DEBUG) 3897 for (i=0; i<m; i++) { 3898 nnz = Ii[i+1]- Ii[i]; 3899 JJ = J + Ii[i]; 3900 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3901 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3902 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3903 } 3904 #endif 3905 3906 for (i=0; i<m; i++) { 3907 nnz = Ii[i+1]- Ii[i]; 3908 JJ = J + Ii[i]; 3909 nnz_max = PetscMax(nnz_max,nnz); 3910 d = 0; 3911 for (j=0; j<nnz; j++) { 3912 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3913 } 3914 d_nnz[i] = d; 3915 o_nnz[i] = nnz - d; 3916 } 3917 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3918 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3919 3920 if (v) values = (PetscScalar*)v; 3921 else { 3922 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3923 } 3924 3925 for (i=0; i<m; i++) { 3926 ii = i + rstart; 3927 nnz = Ii[i+1]- Ii[i]; 3928 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3929 } 3930 nooffprocentries = B->nooffprocentries; 3931 B->nooffprocentries = PETSC_TRUE; 3932 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3933 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3934 B->nooffprocentries = nooffprocentries; 3935 3936 if (!v) { 3937 ierr = PetscFree(values);CHKERRQ(ierr); 3938 } 3939 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3940 PetscFunctionReturn(0); 3941 } 3942 3943 /*@ 3944 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3945 (the default parallel PETSc format). 3946 3947 Collective on MPI_Comm 3948 3949 Input Parameters: 3950 + B - the matrix 3951 . i - the indices into j for the start of each local row (starts with zero) 3952 . j - the column indices for each local row (starts with zero) 3953 - v - optional values in the matrix 3954 3955 Level: developer 3956 3957 Notes: 3958 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3959 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3960 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3961 3962 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3963 3964 The format which is used for the sparse matrix input, is equivalent to a 3965 row-major ordering.. i.e for the following matrix, the input data expected is 3966 as shown 3967 3968 $ 1 0 0 3969 $ 2 0 3 P0 3970 $ ------- 3971 $ 4 5 6 P1 3972 $ 3973 $ Process0 [P0]: rows_owned=[0,1] 3974 $ i = {0,1,3} [size = nrow+1 = 2+1] 3975 $ j = {0,0,2} [size = 3] 3976 $ v = {1,2,3} [size = 3] 3977 $ 3978 $ Process1 [P1]: rows_owned=[2] 3979 $ i = {0,3} [size = nrow+1 = 1+1] 3980 $ j = {0,1,2} [size = 3] 3981 $ v = {4,5,6} [size = 3] 3982 3983 .keywords: matrix, aij, compressed row, sparse, parallel 3984 3985 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3986 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3987 @*/ 3988 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3989 { 3990 PetscErrorCode ierr; 3991 3992 PetscFunctionBegin; 3993 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3994 PetscFunctionReturn(0); 3995 } 3996 3997 /*@C 3998 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3999 (the default parallel PETSc format). For good matrix assembly performance 4000 the user should preallocate the matrix storage by setting the parameters 4001 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4002 performance can be increased by more than a factor of 50. 4003 4004 Collective on MPI_Comm 4005 4006 Input Parameters: 4007 + B - the matrix 4008 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4009 (same value is used for all local rows) 4010 . d_nnz - array containing the number of nonzeros in the various rows of the 4011 DIAGONAL portion of the local submatrix (possibly different for each row) 4012 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4013 The size of this array is equal to the number of local rows, i.e 'm'. 4014 For matrices that will be factored, you must leave room for (and set) 4015 the diagonal entry even if it is zero. 4016 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4017 submatrix (same value is used for all local rows). 4018 - o_nnz - array containing the number of nonzeros in the various rows of the 4019 OFF-DIAGONAL portion of the local submatrix (possibly different for 4020 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4021 structure. The size of this array is equal to the number 4022 of local rows, i.e 'm'. 4023 4024 If the *_nnz parameter is given then the *_nz parameter is ignored 4025 4026 The AIJ format (also called the Yale sparse matrix format or 4027 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4028 storage. The stored row and column indices begin with zero. 4029 See Users-Manual: ch_mat for details. 4030 4031 The parallel matrix is partitioned such that the first m0 rows belong to 4032 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4033 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4034 4035 The DIAGONAL portion of the local submatrix of a processor can be defined 4036 as the submatrix which is obtained by extraction the part corresponding to 4037 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4038 first row that belongs to the processor, r2 is the last row belonging to 4039 the this processor, and c1-c2 is range of indices of the local part of a 4040 vector suitable for applying the matrix to. This is an mxn matrix. In the 4041 common case of a square matrix, the row and column ranges are the same and 4042 the DIAGONAL part is also square. The remaining portion of the local 4043 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4044 4045 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4046 4047 You can call MatGetInfo() to get information on how effective the preallocation was; 4048 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4049 You can also run with the option -info and look for messages with the string 4050 malloc in them to see if additional memory allocation was needed. 4051 4052 Example usage: 4053 4054 Consider the following 8x8 matrix with 34 non-zero values, that is 4055 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4056 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4057 as follows: 4058 4059 .vb 4060 1 2 0 | 0 3 0 | 0 4 4061 Proc0 0 5 6 | 7 0 0 | 8 0 4062 9 0 10 | 11 0 0 | 12 0 4063 ------------------------------------- 4064 13 0 14 | 15 16 17 | 0 0 4065 Proc1 0 18 0 | 19 20 21 | 0 0 4066 0 0 0 | 22 23 0 | 24 0 4067 ------------------------------------- 4068 Proc2 25 26 27 | 0 0 28 | 29 0 4069 30 0 0 | 31 32 33 | 0 34 4070 .ve 4071 4072 This can be represented as a collection of submatrices as: 4073 4074 .vb 4075 A B C 4076 D E F 4077 G H I 4078 .ve 4079 4080 Where the submatrices A,B,C are owned by proc0, D,E,F are 4081 owned by proc1, G,H,I are owned by proc2. 4082 4083 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4084 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4085 The 'M','N' parameters are 8,8, and have the same values on all procs. 4086 4087 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4088 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4089 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4090 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4091 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4092 matrix, ans [DF] as another SeqAIJ matrix. 4093 4094 When d_nz, o_nz parameters are specified, d_nz storage elements are 4095 allocated for every row of the local diagonal submatrix, and o_nz 4096 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4097 One way to choose d_nz and o_nz is to use the max nonzerors per local 4098 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4099 In this case, the values of d_nz,o_nz are: 4100 .vb 4101 proc0 : dnz = 2, o_nz = 2 4102 proc1 : dnz = 3, o_nz = 2 4103 proc2 : dnz = 1, o_nz = 4 4104 .ve 4105 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4106 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4107 for proc3. i.e we are using 12+15+10=37 storage locations to store 4108 34 values. 4109 4110 When d_nnz, o_nnz parameters are specified, the storage is specified 4111 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4112 In the above case the values for d_nnz,o_nnz are: 4113 .vb 4114 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4115 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4116 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4117 .ve 4118 Here the space allocated is sum of all the above values i.e 34, and 4119 hence pre-allocation is perfect. 4120 4121 Level: intermediate 4122 4123 .keywords: matrix, aij, compressed row, sparse, parallel 4124 4125 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4126 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4127 @*/ 4128 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4129 { 4130 PetscErrorCode ierr; 4131 4132 PetscFunctionBegin; 4133 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4134 PetscValidType(B,1); 4135 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4136 PetscFunctionReturn(0); 4137 } 4138 4139 /*@ 4140 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4141 CSR format the local rows. 4142 4143 Collective on MPI_Comm 4144 4145 Input Parameters: 4146 + comm - MPI communicator 4147 . m - number of local rows (Cannot be PETSC_DECIDE) 4148 . n - This value should be the same as the local size used in creating the 4149 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4150 calculated if N is given) For square matrices n is almost always m. 4151 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4152 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4153 . i - row indices 4154 . j - column indices 4155 - a - matrix values 4156 4157 Output Parameter: 4158 . mat - the matrix 4159 4160 Level: intermediate 4161 4162 Notes: 4163 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4164 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4165 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4166 4167 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4168 4169 The format which is used for the sparse matrix input, is equivalent to a 4170 row-major ordering.. i.e for the following matrix, the input data expected is 4171 as shown 4172 4173 $ 1 0 0 4174 $ 2 0 3 P0 4175 $ ------- 4176 $ 4 5 6 P1 4177 $ 4178 $ Process0 [P0]: rows_owned=[0,1] 4179 $ i = {0,1,3} [size = nrow+1 = 2+1] 4180 $ j = {0,0,2} [size = 3] 4181 $ v = {1,2,3} [size = 3] 4182 $ 4183 $ Process1 [P1]: rows_owned=[2] 4184 $ i = {0,3} [size = nrow+1 = 1+1] 4185 $ j = {0,1,2} [size = 3] 4186 $ v = {4,5,6} [size = 3] 4187 4188 .keywords: matrix, aij, compressed row, sparse, parallel 4189 4190 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4191 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4192 @*/ 4193 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4194 { 4195 PetscErrorCode ierr; 4196 4197 PetscFunctionBegin; 4198 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4199 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4200 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4201 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4202 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4203 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4204 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4205 PetscFunctionReturn(0); 4206 } 4207 4208 /*@C 4209 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4210 (the default parallel PETSc format). For good matrix assembly performance 4211 the user should preallocate the matrix storage by setting the parameters 4212 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4213 performance can be increased by more than a factor of 50. 4214 4215 Collective on MPI_Comm 4216 4217 Input Parameters: 4218 + comm - MPI communicator 4219 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4220 This value should be the same as the local size used in creating the 4221 y vector for the matrix-vector product y = Ax. 4222 . n - This value should be the same as the local size used in creating the 4223 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4224 calculated if N is given) For square matrices n is almost always m. 4225 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4226 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4227 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4228 (same value is used for all local rows) 4229 . d_nnz - array containing the number of nonzeros in the various rows of the 4230 DIAGONAL portion of the local submatrix (possibly different for each row) 4231 or NULL, if d_nz is used to specify the nonzero structure. 4232 The size of this array is equal to the number of local rows, i.e 'm'. 4233 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4234 submatrix (same value is used for all local rows). 4235 - o_nnz - array containing the number of nonzeros in the various rows of the 4236 OFF-DIAGONAL portion of the local submatrix (possibly different for 4237 each row) or NULL, if o_nz is used to specify the nonzero 4238 structure. The size of this array is equal to the number 4239 of local rows, i.e 'm'. 4240 4241 Output Parameter: 4242 . A - the matrix 4243 4244 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4245 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4246 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4247 4248 Notes: 4249 If the *_nnz parameter is given then the *_nz parameter is ignored 4250 4251 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4252 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4253 storage requirements for this matrix. 4254 4255 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4256 processor than it must be used on all processors that share the object for 4257 that argument. 4258 4259 The user MUST specify either the local or global matrix dimensions 4260 (possibly both). 4261 4262 The parallel matrix is partitioned across processors such that the 4263 first m0 rows belong to process 0, the next m1 rows belong to 4264 process 1, the next m2 rows belong to process 2 etc.. where 4265 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4266 values corresponding to [m x N] submatrix. 4267 4268 The columns are logically partitioned with the n0 columns belonging 4269 to 0th partition, the next n1 columns belonging to the next 4270 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4271 4272 The DIAGONAL portion of the local submatrix on any given processor 4273 is the submatrix corresponding to the rows and columns m,n 4274 corresponding to the given processor. i.e diagonal matrix on 4275 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4276 etc. The remaining portion of the local submatrix [m x (N-n)] 4277 constitute the OFF-DIAGONAL portion. The example below better 4278 illustrates this concept. 4279 4280 For a square global matrix we define each processor's diagonal portion 4281 to be its local rows and the corresponding columns (a square submatrix); 4282 each processor's off-diagonal portion encompasses the remainder of the 4283 local matrix (a rectangular submatrix). 4284 4285 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4286 4287 When calling this routine with a single process communicator, a matrix of 4288 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4289 type of communicator, use the construction mechanism 4290 .vb 4291 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4292 .ve 4293 4294 $ MatCreate(...,&A); 4295 $ MatSetType(A,MATMPIAIJ); 4296 $ MatSetSizes(A, m,n,M,N); 4297 $ MatMPIAIJSetPreallocation(A,...); 4298 4299 By default, this format uses inodes (identical nodes) when possible. 4300 We search for consecutive rows with the same nonzero structure, thereby 4301 reusing matrix information to achieve increased efficiency. 4302 4303 Options Database Keys: 4304 + -mat_no_inode - Do not use inodes 4305 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4306 4307 4308 4309 Example usage: 4310 4311 Consider the following 8x8 matrix with 34 non-zero values, that is 4312 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4313 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4314 as follows 4315 4316 .vb 4317 1 2 0 | 0 3 0 | 0 4 4318 Proc0 0 5 6 | 7 0 0 | 8 0 4319 9 0 10 | 11 0 0 | 12 0 4320 ------------------------------------- 4321 13 0 14 | 15 16 17 | 0 0 4322 Proc1 0 18 0 | 19 20 21 | 0 0 4323 0 0 0 | 22 23 0 | 24 0 4324 ------------------------------------- 4325 Proc2 25 26 27 | 0 0 28 | 29 0 4326 30 0 0 | 31 32 33 | 0 34 4327 .ve 4328 4329 This can be represented as a collection of submatrices as 4330 4331 .vb 4332 A B C 4333 D E F 4334 G H I 4335 .ve 4336 4337 Where the submatrices A,B,C are owned by proc0, D,E,F are 4338 owned by proc1, G,H,I are owned by proc2. 4339 4340 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4341 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4342 The 'M','N' parameters are 8,8, and have the same values on all procs. 4343 4344 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4345 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4346 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4347 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4348 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4349 matrix, ans [DF] as another SeqAIJ matrix. 4350 4351 When d_nz, o_nz parameters are specified, d_nz storage elements are 4352 allocated for every row of the local diagonal submatrix, and o_nz 4353 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4354 One way to choose d_nz and o_nz is to use the max nonzerors per local 4355 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4356 In this case, the values of d_nz,o_nz are 4357 .vb 4358 proc0 : dnz = 2, o_nz = 2 4359 proc1 : dnz = 3, o_nz = 2 4360 proc2 : dnz = 1, o_nz = 4 4361 .ve 4362 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4363 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4364 for proc3. i.e we are using 12+15+10=37 storage locations to store 4365 34 values. 4366 4367 When d_nnz, o_nnz parameters are specified, the storage is specified 4368 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4369 In the above case the values for d_nnz,o_nnz are 4370 .vb 4371 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4372 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4373 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4374 .ve 4375 Here the space allocated is sum of all the above values i.e 34, and 4376 hence pre-allocation is perfect. 4377 4378 Level: intermediate 4379 4380 .keywords: matrix, aij, compressed row, sparse, parallel 4381 4382 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4383 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4384 @*/ 4385 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4386 { 4387 PetscErrorCode ierr; 4388 PetscMPIInt size; 4389 4390 PetscFunctionBegin; 4391 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4392 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4393 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4394 if (size > 1) { 4395 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4396 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4397 } else { 4398 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4399 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4400 } 4401 PetscFunctionReturn(0); 4402 } 4403 4404 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4405 { 4406 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4407 PetscBool flg; 4408 PetscErrorCode ierr; 4409 4410 PetscFunctionBegin; 4411 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4412 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4413 if (Ad) *Ad = a->A; 4414 if (Ao) *Ao = a->B; 4415 if (colmap) *colmap = a->garray; 4416 PetscFunctionReturn(0); 4417 } 4418 4419 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4420 { 4421 PetscErrorCode ierr; 4422 PetscInt m,N,i,rstart,nnz,Ii; 4423 PetscInt *indx; 4424 PetscScalar *values; 4425 4426 PetscFunctionBegin; 4427 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4428 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4429 PetscInt *dnz,*onz,sum,bs,cbs; 4430 4431 if (n == PETSC_DECIDE) { 4432 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4433 } 4434 /* Check sum(n) = N */ 4435 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4436 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4437 4438 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4439 rstart -= m; 4440 4441 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4442 for (i=0; i<m; i++) { 4443 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4444 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4445 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4446 } 4447 4448 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4449 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4450 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4451 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4452 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4453 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4454 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4455 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4456 } 4457 4458 /* numeric phase */ 4459 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4460 for (i=0; i<m; i++) { 4461 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4462 Ii = i + rstart; 4463 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4464 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4465 } 4466 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4467 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4468 PetscFunctionReturn(0); 4469 } 4470 4471 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4472 { 4473 PetscErrorCode ierr; 4474 PetscMPIInt rank; 4475 PetscInt m,N,i,rstart,nnz; 4476 size_t len; 4477 const PetscInt *indx; 4478 PetscViewer out; 4479 char *name; 4480 Mat B; 4481 const PetscScalar *values; 4482 4483 PetscFunctionBegin; 4484 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4485 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4486 /* Should this be the type of the diagonal block of A? */ 4487 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4488 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4489 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4490 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4491 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4492 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4493 for (i=0; i<m; i++) { 4494 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4495 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4496 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4497 } 4498 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4499 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4500 4501 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4502 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4503 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4504 sprintf(name,"%s.%d",outfile,rank); 4505 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4506 ierr = PetscFree(name);CHKERRQ(ierr); 4507 ierr = MatView(B,out);CHKERRQ(ierr); 4508 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4509 ierr = MatDestroy(&B);CHKERRQ(ierr); 4510 PetscFunctionReturn(0); 4511 } 4512 4513 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4514 { 4515 PetscErrorCode ierr; 4516 Mat_Merge_SeqsToMPI *merge; 4517 PetscContainer container; 4518 4519 PetscFunctionBegin; 4520 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4521 if (container) { 4522 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4523 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4524 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4525 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4526 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4527 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4528 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4529 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4530 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4531 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4532 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4533 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4534 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4535 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4536 ierr = PetscFree(merge);CHKERRQ(ierr); 4537 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4538 } 4539 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4540 PetscFunctionReturn(0); 4541 } 4542 4543 #include <../src/mat/utils/freespace.h> 4544 #include <petscbt.h> 4545 4546 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4547 { 4548 PetscErrorCode ierr; 4549 MPI_Comm comm; 4550 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4551 PetscMPIInt size,rank,taga,*len_s; 4552 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4553 PetscInt proc,m; 4554 PetscInt **buf_ri,**buf_rj; 4555 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4556 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4557 MPI_Request *s_waits,*r_waits; 4558 MPI_Status *status; 4559 MatScalar *aa=a->a; 4560 MatScalar **abuf_r,*ba_i; 4561 Mat_Merge_SeqsToMPI *merge; 4562 PetscContainer container; 4563 4564 PetscFunctionBegin; 4565 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4566 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4567 4568 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4569 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4570 4571 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4572 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4573 4574 bi = merge->bi; 4575 bj = merge->bj; 4576 buf_ri = merge->buf_ri; 4577 buf_rj = merge->buf_rj; 4578 4579 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4580 owners = merge->rowmap->range; 4581 len_s = merge->len_s; 4582 4583 /* send and recv matrix values */ 4584 /*-----------------------------*/ 4585 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4586 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4587 4588 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4589 for (proc=0,k=0; proc<size; proc++) { 4590 if (!len_s[proc]) continue; 4591 i = owners[proc]; 4592 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4593 k++; 4594 } 4595 4596 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4597 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4598 ierr = PetscFree(status);CHKERRQ(ierr); 4599 4600 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4601 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4602 4603 /* insert mat values of mpimat */ 4604 /*----------------------------*/ 4605 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4606 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4607 4608 for (k=0; k<merge->nrecv; k++) { 4609 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4610 nrows = *(buf_ri_k[k]); 4611 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4612 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4613 } 4614 4615 /* set values of ba */ 4616 m = merge->rowmap->n; 4617 for (i=0; i<m; i++) { 4618 arow = owners[rank] + i; 4619 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4620 bnzi = bi[i+1] - bi[i]; 4621 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4622 4623 /* add local non-zero vals of this proc's seqmat into ba */ 4624 anzi = ai[arow+1] - ai[arow]; 4625 aj = a->j + ai[arow]; 4626 aa = a->a + ai[arow]; 4627 nextaj = 0; 4628 for (j=0; nextaj<anzi; j++) { 4629 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4630 ba_i[j] += aa[nextaj++]; 4631 } 4632 } 4633 4634 /* add received vals into ba */ 4635 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4636 /* i-th row */ 4637 if (i == *nextrow[k]) { 4638 anzi = *(nextai[k]+1) - *nextai[k]; 4639 aj = buf_rj[k] + *(nextai[k]); 4640 aa = abuf_r[k] + *(nextai[k]); 4641 nextaj = 0; 4642 for (j=0; nextaj<anzi; j++) { 4643 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4644 ba_i[j] += aa[nextaj++]; 4645 } 4646 } 4647 nextrow[k]++; nextai[k]++; 4648 } 4649 } 4650 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4651 } 4652 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4653 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4654 4655 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4656 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4657 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4658 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4659 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4660 PetscFunctionReturn(0); 4661 } 4662 4663 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4664 { 4665 PetscErrorCode ierr; 4666 Mat B_mpi; 4667 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4668 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4669 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4670 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4671 PetscInt len,proc,*dnz,*onz,bs,cbs; 4672 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4673 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4674 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4675 MPI_Status *status; 4676 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4677 PetscBT lnkbt; 4678 Mat_Merge_SeqsToMPI *merge; 4679 PetscContainer container; 4680 4681 PetscFunctionBegin; 4682 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4683 4684 /* make sure it is a PETSc comm */ 4685 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4686 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4687 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4688 4689 ierr = PetscNew(&merge);CHKERRQ(ierr); 4690 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4691 4692 /* determine row ownership */ 4693 /*---------------------------------------------------------*/ 4694 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4695 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4696 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4697 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4698 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4699 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4700 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4701 4702 m = merge->rowmap->n; 4703 owners = merge->rowmap->range; 4704 4705 /* determine the number of messages to send, their lengths */ 4706 /*---------------------------------------------------------*/ 4707 len_s = merge->len_s; 4708 4709 len = 0; /* length of buf_si[] */ 4710 merge->nsend = 0; 4711 for (proc=0; proc<size; proc++) { 4712 len_si[proc] = 0; 4713 if (proc == rank) { 4714 len_s[proc] = 0; 4715 } else { 4716 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4717 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4718 } 4719 if (len_s[proc]) { 4720 merge->nsend++; 4721 nrows = 0; 4722 for (i=owners[proc]; i<owners[proc+1]; i++) { 4723 if (ai[i+1] > ai[i]) nrows++; 4724 } 4725 len_si[proc] = 2*(nrows+1); 4726 len += len_si[proc]; 4727 } 4728 } 4729 4730 /* determine the number and length of messages to receive for ij-structure */ 4731 /*-------------------------------------------------------------------------*/ 4732 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4733 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4734 4735 /* post the Irecv of j-structure */ 4736 /*-------------------------------*/ 4737 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4738 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4739 4740 /* post the Isend of j-structure */ 4741 /*--------------------------------*/ 4742 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4743 4744 for (proc=0, k=0; proc<size; proc++) { 4745 if (!len_s[proc]) continue; 4746 i = owners[proc]; 4747 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4748 k++; 4749 } 4750 4751 /* receives and sends of j-structure are complete */ 4752 /*------------------------------------------------*/ 4753 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4754 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4755 4756 /* send and recv i-structure */ 4757 /*---------------------------*/ 4758 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4759 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4760 4761 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4762 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4763 for (proc=0,k=0; proc<size; proc++) { 4764 if (!len_s[proc]) continue; 4765 /* form outgoing message for i-structure: 4766 buf_si[0]: nrows to be sent 4767 [1:nrows]: row index (global) 4768 [nrows+1:2*nrows+1]: i-structure index 4769 */ 4770 /*-------------------------------------------*/ 4771 nrows = len_si[proc]/2 - 1; 4772 buf_si_i = buf_si + nrows+1; 4773 buf_si[0] = nrows; 4774 buf_si_i[0] = 0; 4775 nrows = 0; 4776 for (i=owners[proc]; i<owners[proc+1]; i++) { 4777 anzi = ai[i+1] - ai[i]; 4778 if (anzi) { 4779 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4780 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4781 nrows++; 4782 } 4783 } 4784 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4785 k++; 4786 buf_si += len_si[proc]; 4787 } 4788 4789 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4790 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4791 4792 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4793 for (i=0; i<merge->nrecv; i++) { 4794 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4795 } 4796 4797 ierr = PetscFree(len_si);CHKERRQ(ierr); 4798 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4799 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4800 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4801 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4802 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4803 ierr = PetscFree(status);CHKERRQ(ierr); 4804 4805 /* compute a local seq matrix in each processor */ 4806 /*----------------------------------------------*/ 4807 /* allocate bi array and free space for accumulating nonzero column info */ 4808 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4809 bi[0] = 0; 4810 4811 /* create and initialize a linked list */ 4812 nlnk = N+1; 4813 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4814 4815 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4816 len = ai[owners[rank+1]] - ai[owners[rank]]; 4817 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4818 4819 current_space = free_space; 4820 4821 /* determine symbolic info for each local row */ 4822 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4823 4824 for (k=0; k<merge->nrecv; k++) { 4825 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4826 nrows = *buf_ri_k[k]; 4827 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4828 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4829 } 4830 4831 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4832 len = 0; 4833 for (i=0; i<m; i++) { 4834 bnzi = 0; 4835 /* add local non-zero cols of this proc's seqmat into lnk */ 4836 arow = owners[rank] + i; 4837 anzi = ai[arow+1] - ai[arow]; 4838 aj = a->j + ai[arow]; 4839 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4840 bnzi += nlnk; 4841 /* add received col data into lnk */ 4842 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4843 if (i == *nextrow[k]) { /* i-th row */ 4844 anzi = *(nextai[k]+1) - *nextai[k]; 4845 aj = buf_rj[k] + *nextai[k]; 4846 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4847 bnzi += nlnk; 4848 nextrow[k]++; nextai[k]++; 4849 } 4850 } 4851 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4852 4853 /* if free space is not available, make more free space */ 4854 if (current_space->local_remaining<bnzi) { 4855 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4856 nspacedouble++; 4857 } 4858 /* copy data into free space, then initialize lnk */ 4859 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4860 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4861 4862 current_space->array += bnzi; 4863 current_space->local_used += bnzi; 4864 current_space->local_remaining -= bnzi; 4865 4866 bi[i+1] = bi[i] + bnzi; 4867 } 4868 4869 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4870 4871 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4872 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4873 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4874 4875 /* create symbolic parallel matrix B_mpi */ 4876 /*---------------------------------------*/ 4877 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4878 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4879 if (n==PETSC_DECIDE) { 4880 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4881 } else { 4882 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4883 } 4884 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4885 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4886 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4887 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4888 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4889 4890 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4891 B_mpi->assembled = PETSC_FALSE; 4892 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4893 merge->bi = bi; 4894 merge->bj = bj; 4895 merge->buf_ri = buf_ri; 4896 merge->buf_rj = buf_rj; 4897 merge->coi = NULL; 4898 merge->coj = NULL; 4899 merge->owners_co = NULL; 4900 4901 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4902 4903 /* attach the supporting struct to B_mpi for reuse */ 4904 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4905 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4906 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4907 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4908 *mpimat = B_mpi; 4909 4910 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4911 PetscFunctionReturn(0); 4912 } 4913 4914 /*@C 4915 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4916 matrices from each processor 4917 4918 Collective on MPI_Comm 4919 4920 Input Parameters: 4921 + comm - the communicators the parallel matrix will live on 4922 . seqmat - the input sequential matrices 4923 . m - number of local rows (or PETSC_DECIDE) 4924 . n - number of local columns (or PETSC_DECIDE) 4925 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4926 4927 Output Parameter: 4928 . mpimat - the parallel matrix generated 4929 4930 Level: advanced 4931 4932 Notes: 4933 The dimensions of the sequential matrix in each processor MUST be the same. 4934 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4935 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4936 @*/ 4937 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4938 { 4939 PetscErrorCode ierr; 4940 PetscMPIInt size; 4941 4942 PetscFunctionBegin; 4943 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4944 if (size == 1) { 4945 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4946 if (scall == MAT_INITIAL_MATRIX) { 4947 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4948 } else { 4949 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4950 } 4951 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4952 PetscFunctionReturn(0); 4953 } 4954 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4955 if (scall == MAT_INITIAL_MATRIX) { 4956 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4957 } 4958 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4959 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4960 PetscFunctionReturn(0); 4961 } 4962 4963 /*@ 4964 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4965 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4966 with MatGetSize() 4967 4968 Not Collective 4969 4970 Input Parameters: 4971 + A - the matrix 4972 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4973 4974 Output Parameter: 4975 . A_loc - the local sequential matrix generated 4976 4977 Level: developer 4978 4979 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4980 4981 @*/ 4982 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4983 { 4984 PetscErrorCode ierr; 4985 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4986 Mat_SeqAIJ *mat,*a,*b; 4987 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4988 MatScalar *aa,*ba,*cam; 4989 PetscScalar *ca; 4990 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4991 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4992 PetscBool match; 4993 MPI_Comm comm; 4994 PetscMPIInt size; 4995 4996 PetscFunctionBegin; 4997 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4998 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4999 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5000 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5001 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5002 5003 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5004 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5005 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5006 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5007 aa = a->a; ba = b->a; 5008 if (scall == MAT_INITIAL_MATRIX) { 5009 if (size == 1) { 5010 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5011 PetscFunctionReturn(0); 5012 } 5013 5014 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5015 ci[0] = 0; 5016 for (i=0; i<am; i++) { 5017 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5018 } 5019 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5020 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5021 k = 0; 5022 for (i=0; i<am; i++) { 5023 ncols_o = bi[i+1] - bi[i]; 5024 ncols_d = ai[i+1] - ai[i]; 5025 /* off-diagonal portion of A */ 5026 for (jo=0; jo<ncols_o; jo++) { 5027 col = cmap[*bj]; 5028 if (col >= cstart) break; 5029 cj[k] = col; bj++; 5030 ca[k++] = *ba++; 5031 } 5032 /* diagonal portion of A */ 5033 for (j=0; j<ncols_d; j++) { 5034 cj[k] = cstart + *aj++; 5035 ca[k++] = *aa++; 5036 } 5037 /* off-diagonal portion of A */ 5038 for (j=jo; j<ncols_o; j++) { 5039 cj[k] = cmap[*bj++]; 5040 ca[k++] = *ba++; 5041 } 5042 } 5043 /* put together the new matrix */ 5044 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5045 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5046 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5047 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5048 mat->free_a = PETSC_TRUE; 5049 mat->free_ij = PETSC_TRUE; 5050 mat->nonew = 0; 5051 } else if (scall == MAT_REUSE_MATRIX) { 5052 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5053 ci = mat->i; cj = mat->j; cam = mat->a; 5054 for (i=0; i<am; i++) { 5055 /* off-diagonal portion of A */ 5056 ncols_o = bi[i+1] - bi[i]; 5057 for (jo=0; jo<ncols_o; jo++) { 5058 col = cmap[*bj]; 5059 if (col >= cstart) break; 5060 *cam++ = *ba++; bj++; 5061 } 5062 /* diagonal portion of A */ 5063 ncols_d = ai[i+1] - ai[i]; 5064 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5065 /* off-diagonal portion of A */ 5066 for (j=jo; j<ncols_o; j++) { 5067 *cam++ = *ba++; bj++; 5068 } 5069 } 5070 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5071 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5072 PetscFunctionReturn(0); 5073 } 5074 5075 /*@C 5076 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5077 5078 Not Collective 5079 5080 Input Parameters: 5081 + A - the matrix 5082 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5083 - row, col - index sets of rows and columns to extract (or NULL) 5084 5085 Output Parameter: 5086 . A_loc - the local sequential matrix generated 5087 5088 Level: developer 5089 5090 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5091 5092 @*/ 5093 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5094 { 5095 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5096 PetscErrorCode ierr; 5097 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5098 IS isrowa,iscola; 5099 Mat *aloc; 5100 PetscBool match; 5101 5102 PetscFunctionBegin; 5103 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5104 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5105 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5106 if (!row) { 5107 start = A->rmap->rstart; end = A->rmap->rend; 5108 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5109 } else { 5110 isrowa = *row; 5111 } 5112 if (!col) { 5113 start = A->cmap->rstart; 5114 cmap = a->garray; 5115 nzA = a->A->cmap->n; 5116 nzB = a->B->cmap->n; 5117 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5118 ncols = 0; 5119 for (i=0; i<nzB; i++) { 5120 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5121 else break; 5122 } 5123 imark = i; 5124 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5125 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5126 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5127 } else { 5128 iscola = *col; 5129 } 5130 if (scall != MAT_INITIAL_MATRIX) { 5131 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5132 aloc[0] = *A_loc; 5133 } 5134 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5135 if (!col) { /* attach global id of condensed columns */ 5136 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5137 } 5138 *A_loc = aloc[0]; 5139 ierr = PetscFree(aloc);CHKERRQ(ierr); 5140 if (!row) { 5141 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5142 } 5143 if (!col) { 5144 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5145 } 5146 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5147 PetscFunctionReturn(0); 5148 } 5149 5150 /*@C 5151 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5152 5153 Collective on Mat 5154 5155 Input Parameters: 5156 + A,B - the matrices in mpiaij format 5157 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5158 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5159 5160 Output Parameter: 5161 + rowb, colb - index sets of rows and columns of B to extract 5162 - B_seq - the sequential matrix generated 5163 5164 Level: developer 5165 5166 @*/ 5167 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5168 { 5169 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5170 PetscErrorCode ierr; 5171 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5172 IS isrowb,iscolb; 5173 Mat *bseq=NULL; 5174 5175 PetscFunctionBegin; 5176 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5177 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5178 } 5179 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5180 5181 if (scall == MAT_INITIAL_MATRIX) { 5182 start = A->cmap->rstart; 5183 cmap = a->garray; 5184 nzA = a->A->cmap->n; 5185 nzB = a->B->cmap->n; 5186 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5187 ncols = 0; 5188 for (i=0; i<nzB; i++) { /* row < local row index */ 5189 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5190 else break; 5191 } 5192 imark = i; 5193 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5194 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5195 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5196 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5197 } else { 5198 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5199 isrowb = *rowb; iscolb = *colb; 5200 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5201 bseq[0] = *B_seq; 5202 } 5203 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5204 *B_seq = bseq[0]; 5205 ierr = PetscFree(bseq);CHKERRQ(ierr); 5206 if (!rowb) { 5207 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5208 } else { 5209 *rowb = isrowb; 5210 } 5211 if (!colb) { 5212 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5213 } else { 5214 *colb = iscolb; 5215 } 5216 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5217 PetscFunctionReturn(0); 5218 } 5219 5220 /* 5221 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5222 of the OFF-DIAGONAL portion of local A 5223 5224 Collective on Mat 5225 5226 Input Parameters: 5227 + A,B - the matrices in mpiaij format 5228 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5229 5230 Output Parameter: 5231 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5232 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5233 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5234 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5235 5236 Level: developer 5237 5238 */ 5239 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5240 { 5241 VecScatter_MPI_General *gen_to,*gen_from; 5242 PetscErrorCode ierr; 5243 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5244 Mat_SeqAIJ *b_oth; 5245 VecScatter ctx; 5246 MPI_Comm comm; 5247 PetscMPIInt *rprocs,*sprocs,tag,rank; 5248 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5249 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5250 PetscScalar *b_otha,*bufa,*bufA,*vals; 5251 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5252 MPI_Request *rwaits = NULL,*swaits = NULL; 5253 MPI_Status *sstatus,rstatus; 5254 PetscMPIInt jj,size; 5255 VecScatterType type; 5256 PetscBool mpi1; 5257 5258 PetscFunctionBegin; 5259 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5260 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5261 5262 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5263 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5264 } 5265 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5266 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5267 5268 if (size == 1) { 5269 startsj_s = NULL; 5270 bufa_ptr = NULL; 5271 *B_oth = NULL; 5272 PetscFunctionReturn(0); 5273 } 5274 5275 ctx = a->Mvctx; 5276 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5277 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5278 if (!mpi1) { 5279 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5280 thus create a->Mvctx_mpi1 */ 5281 if (!a->Mvctx_mpi1) { 5282 a->Mvctx_mpi1_flg = PETSC_TRUE; 5283 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5284 } 5285 ctx = a->Mvctx_mpi1; 5286 } 5287 tag = ((PetscObject)ctx)->tag; 5288 5289 gen_to = (VecScatter_MPI_General*)ctx->todata; 5290 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5291 nrecvs = gen_from->n; 5292 nsends = gen_to->n; 5293 5294 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5295 srow = gen_to->indices; /* local row index to be sent */ 5296 sstarts = gen_to->starts; 5297 sprocs = gen_to->procs; 5298 sstatus = gen_to->sstatus; 5299 sbs = gen_to->bs; 5300 rstarts = gen_from->starts; 5301 rprocs = gen_from->procs; 5302 rbs = gen_from->bs; 5303 5304 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5305 if (scall == MAT_INITIAL_MATRIX) { 5306 /* i-array */ 5307 /*---------*/ 5308 /* post receives */ 5309 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5310 for (i=0; i<nrecvs; i++) { 5311 rowlen = rvalues + rstarts[i]*rbs; 5312 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5313 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5314 } 5315 5316 /* pack the outgoing message */ 5317 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5318 5319 sstartsj[0] = 0; 5320 rstartsj[0] = 0; 5321 len = 0; /* total length of j or a array to be sent */ 5322 k = 0; 5323 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5324 for (i=0; i<nsends; i++) { 5325 rowlen = svalues + sstarts[i]*sbs; 5326 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5327 for (j=0; j<nrows; j++) { 5328 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5329 for (l=0; l<sbs; l++) { 5330 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5331 5332 rowlen[j*sbs+l] = ncols; 5333 5334 len += ncols; 5335 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5336 } 5337 k++; 5338 } 5339 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5340 5341 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5342 } 5343 /* recvs and sends of i-array are completed */ 5344 i = nrecvs; 5345 while (i--) { 5346 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5347 } 5348 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5349 ierr = PetscFree(svalues);CHKERRQ(ierr); 5350 5351 /* allocate buffers for sending j and a arrays */ 5352 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5353 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5354 5355 /* create i-array of B_oth */ 5356 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5357 5358 b_othi[0] = 0; 5359 len = 0; /* total length of j or a array to be received */ 5360 k = 0; 5361 for (i=0; i<nrecvs; i++) { 5362 rowlen = rvalues + rstarts[i]*rbs; 5363 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5364 for (j=0; j<nrows; j++) { 5365 b_othi[k+1] = b_othi[k] + rowlen[j]; 5366 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5367 k++; 5368 } 5369 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5370 } 5371 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5372 5373 /* allocate space for j and a arrrays of B_oth */ 5374 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5375 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5376 5377 /* j-array */ 5378 /*---------*/ 5379 /* post receives of j-array */ 5380 for (i=0; i<nrecvs; i++) { 5381 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5382 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5383 } 5384 5385 /* pack the outgoing message j-array */ 5386 k = 0; 5387 for (i=0; i<nsends; i++) { 5388 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5389 bufJ = bufj+sstartsj[i]; 5390 for (j=0; j<nrows; j++) { 5391 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5392 for (ll=0; ll<sbs; ll++) { 5393 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5394 for (l=0; l<ncols; l++) { 5395 *bufJ++ = cols[l]; 5396 } 5397 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5398 } 5399 } 5400 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5401 } 5402 5403 /* recvs and sends of j-array are completed */ 5404 i = nrecvs; 5405 while (i--) { 5406 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5407 } 5408 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5409 } else if (scall == MAT_REUSE_MATRIX) { 5410 sstartsj = *startsj_s; 5411 rstartsj = *startsj_r; 5412 bufa = *bufa_ptr; 5413 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5414 b_otha = b_oth->a; 5415 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5416 5417 /* a-array */ 5418 /*---------*/ 5419 /* post receives of a-array */ 5420 for (i=0; i<nrecvs; i++) { 5421 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5422 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5423 } 5424 5425 /* pack the outgoing message a-array */ 5426 k = 0; 5427 for (i=0; i<nsends; i++) { 5428 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5429 bufA = bufa+sstartsj[i]; 5430 for (j=0; j<nrows; j++) { 5431 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5432 for (ll=0; ll<sbs; ll++) { 5433 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5434 for (l=0; l<ncols; l++) { 5435 *bufA++ = vals[l]; 5436 } 5437 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5438 } 5439 } 5440 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5441 } 5442 /* recvs and sends of a-array are completed */ 5443 i = nrecvs; 5444 while (i--) { 5445 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5446 } 5447 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5448 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5449 5450 if (scall == MAT_INITIAL_MATRIX) { 5451 /* put together the new matrix */ 5452 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5453 5454 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5455 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5456 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5457 b_oth->free_a = PETSC_TRUE; 5458 b_oth->free_ij = PETSC_TRUE; 5459 b_oth->nonew = 0; 5460 5461 ierr = PetscFree(bufj);CHKERRQ(ierr); 5462 if (!startsj_s || !bufa_ptr) { 5463 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5464 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5465 } else { 5466 *startsj_s = sstartsj; 5467 *startsj_r = rstartsj; 5468 *bufa_ptr = bufa; 5469 } 5470 } 5471 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5472 PetscFunctionReturn(0); 5473 } 5474 5475 /*@C 5476 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5477 5478 Not Collective 5479 5480 Input Parameters: 5481 . A - The matrix in mpiaij format 5482 5483 Output Parameter: 5484 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5485 . colmap - A map from global column index to local index into lvec 5486 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5487 5488 Level: developer 5489 5490 @*/ 5491 #if defined(PETSC_USE_CTABLE) 5492 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5493 #else 5494 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5495 #endif 5496 { 5497 Mat_MPIAIJ *a; 5498 5499 PetscFunctionBegin; 5500 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5501 PetscValidPointer(lvec, 2); 5502 PetscValidPointer(colmap, 3); 5503 PetscValidPointer(multScatter, 4); 5504 a = (Mat_MPIAIJ*) A->data; 5505 if (lvec) *lvec = a->lvec; 5506 if (colmap) *colmap = a->colmap; 5507 if (multScatter) *multScatter = a->Mvctx; 5508 PetscFunctionReturn(0); 5509 } 5510 5511 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5512 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5513 #if defined(PETSC_HAVE_MKL_SPARSE) 5514 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5515 #endif 5516 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5517 #if defined(PETSC_HAVE_ELEMENTAL) 5518 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5519 #endif 5520 #if defined(PETSC_HAVE_HYPRE) 5521 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5522 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5523 #endif 5524 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5526 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5527 5528 /* 5529 Computes (B'*A')' since computing B*A directly is untenable 5530 5531 n p p 5532 ( ) ( ) ( ) 5533 m ( A ) * n ( B ) = m ( C ) 5534 ( ) ( ) ( ) 5535 5536 */ 5537 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5538 { 5539 PetscErrorCode ierr; 5540 Mat At,Bt,Ct; 5541 5542 PetscFunctionBegin; 5543 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5544 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5545 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5546 ierr = MatDestroy(&At);CHKERRQ(ierr); 5547 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5548 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5549 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5550 PetscFunctionReturn(0); 5551 } 5552 5553 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5554 { 5555 PetscErrorCode ierr; 5556 PetscInt m=A->rmap->n,n=B->cmap->n; 5557 Mat Cmat; 5558 5559 PetscFunctionBegin; 5560 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5561 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5562 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5563 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5564 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5565 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5566 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5567 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5568 5569 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5570 5571 *C = Cmat; 5572 PetscFunctionReturn(0); 5573 } 5574 5575 /* ----------------------------------------------------------------*/ 5576 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5577 { 5578 PetscErrorCode ierr; 5579 5580 PetscFunctionBegin; 5581 if (scall == MAT_INITIAL_MATRIX) { 5582 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5583 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5584 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5585 } 5586 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5587 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5588 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5589 PetscFunctionReturn(0); 5590 } 5591 5592 /*MC 5593 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5594 5595 Options Database Keys: 5596 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5597 5598 Level: beginner 5599 5600 .seealso: MatCreateAIJ() 5601 M*/ 5602 5603 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5604 { 5605 Mat_MPIAIJ *b; 5606 PetscErrorCode ierr; 5607 PetscMPIInt size; 5608 5609 PetscFunctionBegin; 5610 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5611 5612 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5613 B->data = (void*)b; 5614 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5615 B->assembled = PETSC_FALSE; 5616 B->insertmode = NOT_SET_VALUES; 5617 b->size = size; 5618 5619 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5620 5621 /* build cache for off array entries formed */ 5622 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5623 5624 b->donotstash = PETSC_FALSE; 5625 b->colmap = 0; 5626 b->garray = 0; 5627 b->roworiented = PETSC_TRUE; 5628 5629 /* stuff used for matrix vector multiply */ 5630 b->lvec = NULL; 5631 b->Mvctx = NULL; 5632 5633 /* stuff for MatGetRow() */ 5634 b->rowindices = 0; 5635 b->rowvalues = 0; 5636 b->getrowactive = PETSC_FALSE; 5637 5638 /* flexible pointer used in CUSP/CUSPARSE classes */ 5639 b->spptr = NULL; 5640 5641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5649 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5650 #if defined(PETSC_HAVE_MKL_SPARSE) 5651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5652 #endif 5653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5655 #if defined(PETSC_HAVE_ELEMENTAL) 5656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5657 #endif 5658 #if defined(PETSC_HAVE_HYPRE) 5659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5660 #endif 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5666 #if defined(PETSC_HAVE_HYPRE) 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5668 #endif 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5670 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5671 PetscFunctionReturn(0); 5672 } 5673 5674 /*@C 5675 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5676 and "off-diagonal" part of the matrix in CSR format. 5677 5678 Collective on MPI_Comm 5679 5680 Input Parameters: 5681 + comm - MPI communicator 5682 . m - number of local rows (Cannot be PETSC_DECIDE) 5683 . n - This value should be the same as the local size used in creating the 5684 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5685 calculated if N is given) For square matrices n is almost always m. 5686 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5687 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5688 . i - row indices for "diagonal" portion of matrix 5689 . j - column indices 5690 . a - matrix values 5691 . oi - row indices for "off-diagonal" portion of matrix 5692 . oj - column indices 5693 - oa - matrix values 5694 5695 Output Parameter: 5696 . mat - the matrix 5697 5698 Level: advanced 5699 5700 Notes: 5701 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5702 must free the arrays once the matrix has been destroyed and not before. 5703 5704 The i and j indices are 0 based 5705 5706 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5707 5708 This sets local rows and cannot be used to set off-processor values. 5709 5710 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5711 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5712 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5713 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5714 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5715 communication if it is known that only local entries will be set. 5716 5717 .keywords: matrix, aij, compressed row, sparse, parallel 5718 5719 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5720 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5721 @*/ 5722 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5723 { 5724 PetscErrorCode ierr; 5725 Mat_MPIAIJ *maij; 5726 5727 PetscFunctionBegin; 5728 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5729 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5730 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5731 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5732 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5733 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5734 maij = (Mat_MPIAIJ*) (*mat)->data; 5735 5736 (*mat)->preallocated = PETSC_TRUE; 5737 5738 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5739 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5740 5741 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5742 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5743 5744 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5745 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5746 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5747 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5748 5749 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5750 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5751 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5752 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5753 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5754 PetscFunctionReturn(0); 5755 } 5756 5757 /* 5758 Special version for direct calls from Fortran 5759 */ 5760 #include <petsc/private/fortranimpl.h> 5761 5762 /* Change these macros so can be used in void function */ 5763 #undef CHKERRQ 5764 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5765 #undef SETERRQ2 5766 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5767 #undef SETERRQ3 5768 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5769 #undef SETERRQ 5770 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5771 5772 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5773 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5774 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5775 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5776 #else 5777 #endif 5778 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5779 { 5780 Mat mat = *mmat; 5781 PetscInt m = *mm, n = *mn; 5782 InsertMode addv = *maddv; 5783 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5784 PetscScalar value; 5785 PetscErrorCode ierr; 5786 5787 MatCheckPreallocated(mat,1); 5788 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5789 5790 #if defined(PETSC_USE_DEBUG) 5791 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5792 #endif 5793 { 5794 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5795 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5796 PetscBool roworiented = aij->roworiented; 5797 5798 /* Some Variables required in the macro */ 5799 Mat A = aij->A; 5800 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5801 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5802 MatScalar *aa = a->a; 5803 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5804 Mat B = aij->B; 5805 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5806 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5807 MatScalar *ba = b->a; 5808 5809 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5810 PetscInt nonew = a->nonew; 5811 MatScalar *ap1,*ap2; 5812 5813 PetscFunctionBegin; 5814 for (i=0; i<m; i++) { 5815 if (im[i] < 0) continue; 5816 #if defined(PETSC_USE_DEBUG) 5817 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5818 #endif 5819 if (im[i] >= rstart && im[i] < rend) { 5820 row = im[i] - rstart; 5821 lastcol1 = -1; 5822 rp1 = aj + ai[row]; 5823 ap1 = aa + ai[row]; 5824 rmax1 = aimax[row]; 5825 nrow1 = ailen[row]; 5826 low1 = 0; 5827 high1 = nrow1; 5828 lastcol2 = -1; 5829 rp2 = bj + bi[row]; 5830 ap2 = ba + bi[row]; 5831 rmax2 = bimax[row]; 5832 nrow2 = bilen[row]; 5833 low2 = 0; 5834 high2 = nrow2; 5835 5836 for (j=0; j<n; j++) { 5837 if (roworiented) value = v[i*n+j]; 5838 else value = v[i+j*m]; 5839 if (in[j] >= cstart && in[j] < cend) { 5840 col = in[j] - cstart; 5841 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5842 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5843 } else if (in[j] < 0) continue; 5844 #if defined(PETSC_USE_DEBUG) 5845 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5846 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5847 #endif 5848 else { 5849 if (mat->was_assembled) { 5850 if (!aij->colmap) { 5851 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5852 } 5853 #if defined(PETSC_USE_CTABLE) 5854 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5855 col--; 5856 #else 5857 col = aij->colmap[in[j]] - 1; 5858 #endif 5859 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5860 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5861 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5862 col = in[j]; 5863 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5864 B = aij->B; 5865 b = (Mat_SeqAIJ*)B->data; 5866 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5867 rp2 = bj + bi[row]; 5868 ap2 = ba + bi[row]; 5869 rmax2 = bimax[row]; 5870 nrow2 = bilen[row]; 5871 low2 = 0; 5872 high2 = nrow2; 5873 bm = aij->B->rmap->n; 5874 ba = b->a; 5875 } 5876 } else col = in[j]; 5877 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5878 } 5879 } 5880 } else if (!aij->donotstash) { 5881 if (roworiented) { 5882 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5883 } else { 5884 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5885 } 5886 } 5887 } 5888 } 5889 PetscFunctionReturnVoid(); 5890 } 5891 5892