1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=NULL; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 796 797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 803 PetscFunctionBegin; 804 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 805 806 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 807 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 808 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 813 { 814 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 816 PetscErrorCode ierr; 817 PetscMPIInt n; 818 PetscInt i,j,rstart,ncols,flg; 819 PetscInt *row,*col; 820 PetscBool other_disassembled; 821 PetscScalar *val; 822 823 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 824 825 PetscFunctionBegin; 826 if (!aij->donotstash && !mat->nooffprocentries) { 827 while (1) { 828 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 829 if (!flg) break; 830 831 for (i=0; i<n;) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j=i,rstart=row[j]; j<n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j-i; 837 else ncols = n-i; 838 /* Now assemble all these values with a single function call */ 839 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 840 i = j; 841 } 842 } 843 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 844 } 845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 846 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 847 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 848 if (mat->boundtocpu) { 849 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 850 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 851 } 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = NULL; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscMPIInt p = 0; 993 PetscSFNode *rrows; 994 PetscSF sf; 995 const PetscScalar *xx; 996 PetscScalar *bb,*mask; 997 Vec xmask,lmask; 998 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 999 const PetscInt *aj, *ii,*ridx; 1000 PetscScalar *aa; 1001 1002 PetscFunctionBegin; 1003 /* Create SF where leaves are input rows and roots are owned rows */ 1004 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1005 for (r = 0; r < n; ++r) lrows[r] = -1; 1006 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1007 for (r = 0; r < N; ++r) { 1008 const PetscInt idx = rows[r]; 1009 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1010 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1011 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1012 } 1013 rrows[r].rank = p; 1014 rrows[r].index = rows[r] - owners[p]; 1015 } 1016 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1017 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1018 /* Collect flags for rows to be zeroed */ 1019 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1022 /* Compress and put in row numbers */ 1023 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1024 /* zero diagonal part of matrix */ 1025 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1026 /* handle off diagonal part of matrix */ 1027 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1028 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1029 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1030 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1031 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1035 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1036 PetscBool cong; 1037 1038 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1039 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1040 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1043 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1044 } 1045 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1046 /* remove zeroed rows of off diagonal matrix */ 1047 ii = aij->i; 1048 for (i=0; i<len; i++) { 1049 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1050 } 1051 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1052 if (aij->compressedrow.use) { 1053 m = aij->compressedrow.nrows; 1054 ii = aij->compressedrow.i; 1055 ridx = aij->compressedrow.rindex; 1056 for (i=0; i<m; i++) { 1057 n = ii[i+1] - ii[i]; 1058 aj = aij->j + ii[i]; 1059 aa = aij->a + ii[i]; 1060 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[*ridx] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 ridx++; 1070 } 1071 } else { /* do not use compressed row format */ 1072 m = l->B->rmap->n; 1073 for (i=0; i<m; i++) { 1074 n = ii[i+1] - ii[i]; 1075 aj = aij->j + ii[i]; 1076 aa = aij->a + ii[i]; 1077 for (j=0; j<n; j++) { 1078 if (PetscAbsScalar(mask[*aj])) { 1079 if (b) bb[i] -= *aa*xx[*aj]; 1080 *aa = 0.0; 1081 } 1082 aa++; 1083 aj++; 1084 } 1085 } 1086 } 1087 if (x && b) { 1088 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1089 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1090 } 1091 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1092 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1093 ierr = PetscFree(lrows);CHKERRQ(ierr); 1094 1095 /* only change matrix nonzero state if pattern was allowed to be changed */ 1096 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1097 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1098 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 VecScatter Mvctx = a->Mvctx; 1109 1110 PetscFunctionBegin; 1111 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1112 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1113 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 VecScatter Mvctx = a->Mvctx; 1135 1136 PetscFunctionBegin; 1137 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1138 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* do local part */ 1154 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1155 /* add partial results together */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1162 { 1163 MPI_Comm comm; 1164 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1165 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1166 IS Me,Notme; 1167 PetscErrorCode ierr; 1168 PetscInt M,N,first,last,*notme,i; 1169 PetscBool lf; 1170 PetscMPIInt size; 1171 1172 PetscFunctionBegin; 1173 /* Easy test: symmetric diagonal block */ 1174 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1175 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1176 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1177 if (!*f) PetscFunctionReturn(0); 1178 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1180 if (size == 1) PetscFunctionReturn(0); 1181 1182 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1183 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1184 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1185 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1186 for (i=0; i<first; i++) notme[i] = i; 1187 for (i=last; i<M; i++) notme[i-last+first] = i; 1188 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1189 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1190 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1191 Aoff = Aoffs[0]; 1192 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1193 Boff = Boffs[0]; 1194 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1195 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1197 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1199 ierr = PetscFree(notme);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1204 { 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1213 { 1214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 /* do nondiagonal part */ 1219 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1220 /* do local part */ 1221 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1222 /* add partial results together */ 1223 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1233 { 1234 PetscErrorCode ierr; 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 1237 PetscFunctionBegin; 1238 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1239 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1245 { 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1251 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1256 { 1257 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 #if defined(PETSC_USE_LOG) 1262 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1263 #endif 1264 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1266 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1268 #if defined(PETSC_USE_CTABLE) 1269 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1270 #else 1271 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1272 #endif 1273 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1274 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1275 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1276 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1277 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1278 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1279 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1280 1281 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1282 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1283 1284 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1294 #if defined(PETSC_HAVE_ELEMENTAL) 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1296 #endif 1297 #if defined(PETSC_HAVE_SCALAPACK) 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 #if defined(PETSC_HAVE_HYPRE) 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1303 #endif 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 const PetscInt *garray = aij->garray; 1316 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1317 PetscInt *rowlens; 1318 PetscInt *colidxs; 1319 PetscScalar *matvals; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1324 1325 M = mat->rmap->N; 1326 N = mat->cmap->N; 1327 m = mat->rmap->n; 1328 rs = mat->rmap->rstart; 1329 cs = mat->cmap->rstart; 1330 nz = A->nz + B->nz; 1331 1332 /* write matrix header */ 1333 header[0] = MAT_FILE_CLASSID; 1334 header[1] = M; header[2] = N; header[3] = nz; 1335 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1336 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1337 1338 /* fill in and store row lengths */ 1339 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1340 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1341 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1342 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1343 1344 /* fill in and store column indices */ 1345 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1346 for (cnt=0, i=0; i<m; i++) { 1347 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1348 if (garray[B->j[jb]] > cs) break; 1349 colidxs[cnt++] = garray[B->j[jb]]; 1350 } 1351 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1352 colidxs[cnt++] = A->j[ja] + cs; 1353 for (; jb<B->i[i+1]; jb++) 1354 colidxs[cnt++] = garray[B->j[jb]]; 1355 } 1356 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1357 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1358 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1359 1360 /* fill in and store nonzero values */ 1361 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1362 for (cnt=0, i=0; i<m; i++) { 1363 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1364 if (garray[B->j[jb]] > cs) break; 1365 matvals[cnt++] = B->a[jb]; 1366 } 1367 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1368 matvals[cnt++] = A->a[ja]; 1369 for (; jb<B->i[i+1]; jb++) 1370 matvals[cnt++] = B->a[jb]; 1371 } 1372 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1373 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1374 ierr = PetscFree(matvals);CHKERRQ(ierr); 1375 1376 /* write block size option to the viewer's .info file */ 1377 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #include <petscdraw.h> 1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1383 { 1384 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1385 PetscErrorCode ierr; 1386 PetscMPIInt rank = aij->rank,size = aij->size; 1387 PetscBool isdraw,iascii,isbinary; 1388 PetscViewer sviewer; 1389 PetscViewerFormat format; 1390 1391 PetscFunctionBegin; 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1393 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1394 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1395 if (iascii) { 1396 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1397 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1398 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1399 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1400 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1401 for (i=0; i<(PetscInt)size; i++) { 1402 nmax = PetscMax(nmax,nz[i]); 1403 nmin = PetscMin(nmin,nz[i]); 1404 navg += nz[i]; 1405 } 1406 ierr = PetscFree(nz);CHKERRQ(ierr); 1407 navg = navg/size; 1408 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1412 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1413 MatInfo info; 1414 PetscBool inodes; 1415 1416 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1417 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1418 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1420 if (!inodes) { 1421 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1422 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1423 } else { 1424 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1425 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1426 } 1427 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1429 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1430 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1431 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1432 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1433 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1434 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1437 PetscInt inodecount,inodelimit,*inodes; 1438 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1439 if (inodes) { 1440 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1441 } else { 1442 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1446 PetscFunctionReturn(0); 1447 } 1448 } else if (isbinary) { 1449 if (size == 1) { 1450 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1451 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } else if (iascii && size == 1) { 1457 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1458 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } else if (isdraw) { 1461 PetscDraw draw; 1462 PetscBool isnull; 1463 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1464 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1465 if (isnull) PetscFunctionReturn(0); 1466 } 1467 1468 { /* assemble the entire matrix onto first processor */ 1469 Mat A = NULL, Av; 1470 IS isrow,iscol; 1471 1472 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1473 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1474 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1475 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1476 /* The commented code uses MatCreateSubMatrices instead */ 1477 /* 1478 Mat *AA, A = NULL, Av; 1479 IS isrow,iscol; 1480 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1482 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1483 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1484 if (!rank) { 1485 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1486 A = AA[0]; 1487 Av = AA[0]; 1488 } 1489 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1490 */ 1491 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1492 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1493 /* 1494 Everyone has to call to draw the matrix since the graphics waits are 1495 synchronized across all processors that share the PetscDraw object 1496 */ 1497 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1498 if (!rank) { 1499 if (((PetscObject)mat)->name) { 1500 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1501 } 1502 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1503 } 1504 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1506 ierr = MatDestroy(&A);CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1512 { 1513 PetscErrorCode ierr; 1514 PetscBool iascii,isdraw,issocket,isbinary; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1518 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1519 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1521 if (iascii || isdraw || isbinary || issocket) { 1522 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1528 { 1529 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1530 PetscErrorCode ierr; 1531 Vec bb1 = NULL; 1532 PetscBool hasop; 1533 1534 PetscFunctionBegin; 1535 if (flag == SOR_APPLY_UPPER) { 1536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1541 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1542 } 1543 1544 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1545 if (flag & SOR_ZERO_INITIAL_GUESS) { 1546 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1547 its--; 1548 } 1549 1550 while (its--) { 1551 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1552 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1553 1554 /* update rhs: bb1 = bb - B*x */ 1555 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1556 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1557 1558 /* local sweep */ 1559 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1560 } 1561 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1562 if (flag & SOR_ZERO_INITIAL_GUESS) { 1563 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1564 its--; 1565 } 1566 while (its--) { 1567 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 1570 /* update rhs: bb1 = bb - B*x */ 1571 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1572 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1573 1574 /* local sweep */ 1575 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1576 } 1577 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1578 if (flag & SOR_ZERO_INITIAL_GUESS) { 1579 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1580 its--; 1581 } 1582 while (its--) { 1583 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 1586 /* update rhs: bb1 = bb - B*x */ 1587 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1588 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1589 1590 /* local sweep */ 1591 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1592 } 1593 } else if (flag & SOR_EISENSTAT) { 1594 Vec xx1; 1595 1596 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1597 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1598 1599 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1600 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 if (!mat->diag) { 1602 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1603 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1604 } 1605 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1606 if (hasop) { 1607 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1608 } else { 1609 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1610 } 1611 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1612 1613 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1614 1615 /* local sweep */ 1616 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1617 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1618 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1619 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1620 1621 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1622 1623 matin->factorerrortype = mat->A->factorerrortype; 1624 PetscFunctionReturn(0); 1625 } 1626 1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1628 { 1629 Mat aA,aB,Aperm; 1630 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1631 PetscScalar *aa,*ba; 1632 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1633 PetscSF rowsf,sf; 1634 IS parcolp = NULL; 1635 PetscBool done; 1636 PetscErrorCode ierr; 1637 1638 PetscFunctionBegin; 1639 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1640 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1641 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1642 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1643 1644 /* Invert row permutation to find out where my rows should go */ 1645 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1646 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1647 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1648 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1649 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1650 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1651 1652 /* Invert column permutation to find out where my columns should go */ 1653 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1654 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1655 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1656 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1657 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1658 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1659 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1660 1661 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1662 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1663 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1664 1665 /* Find out where my gcols should go */ 1666 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1667 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1668 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1669 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1670 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1671 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1672 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1674 1675 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1676 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1677 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1678 for (i=0; i<m; i++) { 1679 PetscInt row = rdest[i]; 1680 PetscMPIInt rowner; 1681 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1682 for (j=ai[i]; j<ai[i+1]; j++) { 1683 PetscInt col = cdest[aj[j]]; 1684 PetscMPIInt cowner; 1685 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1686 if (rowner == cowner) dnnz[i]++; 1687 else onnz[i]++; 1688 } 1689 for (j=bi[i]; j<bi[i+1]; j++) { 1690 PetscInt col = gcdest[bj[j]]; 1691 PetscMPIInt cowner; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 } 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1701 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1702 1703 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1705 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1706 for (i=0; i<m; i++) { 1707 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1708 PetscInt j0,rowlen; 1709 rowlen = ai[i+1] - ai[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1711 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 rowlen = bi[i+1] - bi[i]; 1715 for (j0=j=0; j<rowlen; j0=j) { 1716 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1717 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1718 } 1719 } 1720 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1723 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1725 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1726 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1727 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1728 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1729 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1730 *B = Aperm; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1741 if (ghosts) *ghosts = aij->garray; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1746 { 1747 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1748 Mat A = mat->A,B = mat->B; 1749 PetscErrorCode ierr; 1750 PetscLogDouble isend[5],irecv[5]; 1751 1752 PetscFunctionBegin; 1753 info->block_size = 1.0; 1754 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 1761 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1762 isend[3] += info->memory; isend[4] += info->mallocs; 1763 if (flag == MAT_LOCAL) { 1764 info->nz_used = isend[0]; 1765 info->nz_allocated = isend[1]; 1766 info->nz_unneeded = isend[2]; 1767 info->memory = isend[3]; 1768 info->mallocs = isend[4]; 1769 } else if (flag == MAT_GLOBAL_MAX) { 1770 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1771 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1779 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 MatCheckPreallocated(A,1); 1807 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1808 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1809 break; 1810 case MAT_ROW_ORIENTED: 1811 MatCheckPreallocated(A,1); 1812 a->roworiented = flg; 1813 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 case MAT_SORTED_FULL: 1819 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1820 break; 1821 case MAT_IGNORE_OFF_PROC_ENTRIES: 1822 a->donotstash = flg; 1823 break; 1824 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1825 case MAT_SPD: 1826 case MAT_SYMMETRIC: 1827 case MAT_STRUCTURALLY_SYMMETRIC: 1828 case MAT_HERMITIAN: 1829 case MAT_SYMMETRY_ETERNAL: 1830 break; 1831 case MAT_SUBMAT_SINGLEIS: 1832 A->submat_singleis = flg; 1833 break; 1834 case MAT_STRUCTURE_ONLY: 1835 /* The option is handled directly by MatSetOption() */ 1836 break; 1837 default: 1838 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1839 } 1840 PetscFunctionReturn(0); 1841 } 1842 1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1844 { 1845 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1846 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1847 PetscErrorCode ierr; 1848 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1849 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1850 PetscInt *cmap,*idx_p; 1851 1852 PetscFunctionBegin; 1853 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1854 mat->getrowactive = PETSC_TRUE; 1855 1856 if (!mat->rowvalues && (idx || v)) { 1857 /* 1858 allocate enough space to hold information from the longest row. 1859 */ 1860 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1861 PetscInt max = 1,tmp; 1862 for (i=0; i<matin->rmap->n; i++) { 1863 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1864 if (max < tmp) max = tmp; 1865 } 1866 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1867 } 1868 1869 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1870 lrow = row - rstart; 1871 1872 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1873 if (!v) {pvA = NULL; pvB = NULL;} 1874 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1875 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1876 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1877 nztot = nzA + nzB; 1878 1879 cmap = mat->garray; 1880 if (v || idx) { 1881 if (nztot) { 1882 /* Sort by increasing column numbers, assuming A and B already sorted */ 1883 PetscInt imark = -1; 1884 if (v) { 1885 *v = v_p = mat->rowvalues; 1886 for (i=0; i<nzB; i++) { 1887 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1888 else break; 1889 } 1890 imark = i; 1891 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1892 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1893 } 1894 if (idx) { 1895 *idx = idx_p = mat->rowindices; 1896 if (imark > -1) { 1897 for (i=0; i<imark; i++) { 1898 idx_p[i] = cmap[cworkB[i]]; 1899 } 1900 } else { 1901 for (i=0; i<nzB; i++) { 1902 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1903 else break; 1904 } 1905 imark = i; 1906 } 1907 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1908 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1909 } 1910 } else { 1911 if (idx) *idx = NULL; 1912 if (v) *v = NULL; 1913 } 1914 } 1915 *nz = nztot; 1916 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1917 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1922 { 1923 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1924 1925 PetscFunctionBegin; 1926 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1927 aij->getrowactive = PETSC_FALSE; 1928 PetscFunctionReturn(0); 1929 } 1930 1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1932 { 1933 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1934 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1935 PetscErrorCode ierr; 1936 PetscInt i,j,cstart = mat->cmap->rstart; 1937 PetscReal sum = 0.0; 1938 MatScalar *v; 1939 1940 PetscFunctionBegin; 1941 if (aij->size == 1) { 1942 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1943 } else { 1944 if (type == NORM_FROBENIUS) { 1945 v = amat->a; 1946 for (i=0; i<amat->nz; i++) { 1947 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1948 } 1949 v = bmat->a; 1950 for (i=0; i<bmat->nz; i++) { 1951 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1952 } 1953 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1954 *norm = PetscSqrtReal(*norm); 1955 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1956 } else if (type == NORM_1) { /* max column norm */ 1957 PetscReal *tmp,*tmp2; 1958 PetscInt *jj,*garray = aij->garray; 1959 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1960 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1961 *norm = 0.0; 1962 v = amat->a; jj = amat->j; 1963 for (j=0; j<amat->nz; j++) { 1964 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1965 } 1966 v = bmat->a; jj = bmat->j; 1967 for (j=0; j<bmat->nz; j++) { 1968 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1969 } 1970 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1971 for (j=0; j<mat->cmap->N; j++) { 1972 if (tmp2[j] > *norm) *norm = tmp2[j]; 1973 } 1974 ierr = PetscFree(tmp);CHKERRQ(ierr); 1975 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1976 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1977 } else if (type == NORM_INFINITY) { /* max row norm */ 1978 PetscReal ntemp = 0.0; 1979 for (j=0; j<aij->A->rmap->n; j++) { 1980 v = amat->a + amat->i[j]; 1981 sum = 0.0; 1982 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1983 sum += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a + bmat->i[j]; 1986 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 if (sum > ntemp) ntemp = sum; 1990 } 1991 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1992 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1993 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1999 { 2000 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2001 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2002 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2003 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2004 PetscErrorCode ierr; 2005 Mat B,A_diag,*B_diag; 2006 const MatScalar *array; 2007 2008 PetscFunctionBegin; 2009 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2010 ai = Aloc->i; aj = Aloc->j; 2011 bi = Bloc->i; bj = Bloc->j; 2012 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2013 PetscInt *d_nnz,*g_nnz,*o_nnz; 2014 PetscSFNode *oloc; 2015 PETSC_UNUSED PetscSF sf; 2016 2017 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2018 /* compute d_nnz for preallocation */ 2019 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2020 for (i=0; i<ai[ma]; i++) { 2021 d_nnz[aj[i]]++; 2022 } 2023 /* compute local off-diagonal contributions */ 2024 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2025 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2026 /* map those to global */ 2027 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2028 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2029 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2030 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2031 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2032 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2033 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2034 2035 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2036 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2037 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2038 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2039 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2040 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2041 } else { 2042 B = *matout; 2043 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2044 } 2045 2046 b = (Mat_MPIAIJ*)B->data; 2047 A_diag = a->A; 2048 B_diag = &b->A; 2049 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2050 A_diag_ncol = A_diag->cmap->N; 2051 B_diag_ilen = sub_B_diag->ilen; 2052 B_diag_i = sub_B_diag->i; 2053 2054 /* Set ilen for diagonal of B */ 2055 for (i=0; i<A_diag_ncol; i++) { 2056 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2057 } 2058 2059 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2060 very quickly (=without using MatSetValues), because all writes are local. */ 2061 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2062 2063 /* copy over the B part */ 2064 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2065 array = Bloc->a; 2066 row = A->rmap->rstart; 2067 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2068 cols_tmp = cols; 2069 for (i=0; i<mb; i++) { 2070 ncol = bi[i+1]-bi[i]; 2071 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2072 row++; 2073 array += ncol; cols_tmp += ncol; 2074 } 2075 ierr = PetscFree(cols);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2080 *matout = B; 2081 } else { 2082 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2088 { 2089 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2090 Mat a = aij->A,b = aij->B; 2091 PetscErrorCode ierr; 2092 PetscInt s1,s2,s3; 2093 2094 PetscFunctionBegin; 2095 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2096 if (rr) { 2097 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2098 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2099 /* Overlap communication with computation. */ 2100 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 } 2102 if (ll) { 2103 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2104 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2105 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2106 } 2107 /* scale the diagonal block */ 2108 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2109 2110 if (rr) { 2111 /* Do a scatter end and then right scale the off-diagonal block */ 2112 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2113 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2119 { 2120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2129 { 2130 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2131 Mat a,b,c,d; 2132 PetscBool flg; 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 a = matA->A; b = matA->B; 2137 c = matB->A; d = matB->B; 2138 2139 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2140 if (flg) { 2141 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2142 } 2143 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2151 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2152 2153 PetscFunctionBegin; 2154 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2155 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2156 /* because of the column compression in the off-processor part of the matrix a->B, 2157 the number of columns in a->B and b->B may be different, hence we cannot call 2158 the MatCopy() directly on the two parts. If need be, we can provide a more 2159 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2160 then copying the submatrices */ 2161 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2162 } else { 2163 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2164 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2165 } 2166 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* 2180 Computes the number of nonzeros per row needed for preallocation when X and Y 2181 have different nonzero structure. 2182 */ 2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2184 { 2185 PetscInt i,j,k,nzx,nzy; 2186 2187 PetscFunctionBegin; 2188 /* Set the number of nonzeros in the new matrix */ 2189 for (i=0; i<m; i++) { 2190 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2191 nzx = xi[i+1] - xi[i]; 2192 nzy = yi[i+1] - yi[i]; 2193 nnz[i] = 0; 2194 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2195 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2196 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2197 nnz[i]++; 2198 } 2199 for (; k<nzy; k++) nnz[i]++; 2200 } 2201 PetscFunctionReturn(0); 2202 } 2203 2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt m = Y->rmap->N; 2209 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2210 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2211 2212 PetscFunctionBegin; 2213 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2214 PetscFunctionReturn(0); 2215 } 2216 2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2218 { 2219 PetscErrorCode ierr; 2220 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2221 PetscBLASInt bnz,one=1; 2222 Mat_SeqAIJ *x,*y; 2223 2224 PetscFunctionBegin; 2225 if (str == SAME_NONZERO_PATTERN) { 2226 PetscScalar alpha = a; 2227 x = (Mat_SeqAIJ*)xx->A->data; 2228 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2229 y = (Mat_SeqAIJ*)yy->A->data; 2230 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2231 x = (Mat_SeqAIJ*)xx->B->data; 2232 y = (Mat_SeqAIJ*)yy->B->data; 2233 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2234 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2235 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2236 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2237 will be updated */ 2238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2239 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2240 Y->offloadmask = PETSC_OFFLOAD_CPU; 2241 } 2242 #endif 2243 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2244 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2245 } else { 2246 Mat B; 2247 PetscInt *nnz_d,*nnz_o; 2248 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2249 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2250 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2251 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2252 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2253 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2254 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2255 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2256 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2257 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2258 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2259 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2260 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2261 } 2262 PetscFunctionReturn(0); 2263 } 2264 2265 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2266 2267 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2268 { 2269 #if defined(PETSC_USE_COMPLEX) 2270 PetscErrorCode ierr; 2271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2272 2273 PetscFunctionBegin; 2274 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2275 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2276 #else 2277 PetscFunctionBegin; 2278 #endif 2279 PetscFunctionReturn(0); 2280 } 2281 2282 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2283 { 2284 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2285 PetscErrorCode ierr; 2286 2287 PetscFunctionBegin; 2288 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2289 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2300 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2305 { 2306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2307 PetscErrorCode ierr; 2308 PetscInt i,*idxb = NULL,m = A->rmap->n; 2309 PetscScalar *va,*vv; 2310 Vec vB,vA; 2311 const PetscScalar *vb; 2312 2313 PetscFunctionBegin; 2314 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vA);CHKERRQ(ierr); 2315 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2316 2317 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2318 if (idx) { 2319 for (i=0; i<m; i++) { 2320 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2321 } 2322 } 2323 2324 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vB);CHKERRQ(ierr); 2325 if (idx) { 2326 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2327 } 2328 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2329 2330 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2331 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2332 for (i=0; i<m; i++) { 2333 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2334 vv[i] = vb[i]; 2335 if (idx) idx[i] = a->garray[idxb[i]]; 2336 } else { 2337 vv[i] = va[i]; 2338 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idx[i] > a->garray[idxb[i]]) 2339 idx[i] = a->garray[idxb[i]]; 2340 } 2341 } 2342 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2343 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2344 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2345 ierr = PetscFree(idxb);CHKERRQ(ierr); 2346 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2347 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2348 PetscFunctionReturn(0); 2349 } 2350 2351 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2352 { 2353 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2354 PetscErrorCode ierr; 2355 PetscInt i,*idxb = NULL; 2356 PetscScalar *va,*vb; 2357 Vec vtmp; 2358 2359 PetscFunctionBegin; 2360 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2361 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2362 if (idx) { 2363 for (i=0; i<A->cmap->n; i++) { 2364 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2365 } 2366 } 2367 2368 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2369 if (idx) { 2370 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2371 } 2372 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2373 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2374 2375 for (i=0; i<A->rmap->n; i++) { 2376 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2377 va[i] = vb[i]; 2378 if (idx) idx[i] = a->garray[idxb[i]]; 2379 } 2380 } 2381 2382 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2383 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2384 ierr = PetscFree(idxb);CHKERRQ(ierr); 2385 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2386 PetscFunctionReturn(0); 2387 } 2388 2389 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2390 { 2391 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2392 PetscInt m = A->rmap->n,n = A->cmap->n; 2393 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2394 PetscInt *cmap = mat->garray; 2395 PetscInt *diagIdx, *offdiagIdx; 2396 Vec diagV, offdiagV; 2397 PetscScalar *a, *diagA, *offdiagA, *ba; 2398 PetscInt r,j,col,ncols,*bi,*bj; 2399 PetscErrorCode ierr; 2400 Mat B = mat->B; 2401 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2402 2403 PetscFunctionBegin; 2404 /* When a process holds entire A and other processes have no entry */ 2405 if (A->cmap->N == n) { 2406 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2407 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2408 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2409 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2410 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } else if (n == 0) { 2413 if (m) { 2414 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2415 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2416 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2417 } 2418 PetscFunctionReturn(0); 2419 } 2420 2421 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2422 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2423 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2424 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2425 2426 /* Get offdiagIdx[] for implicit 0.0 */ 2427 ba = b->a; 2428 bi = b->i; 2429 bj = b->j; 2430 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2431 for (r = 0; r < m; r++) { 2432 ncols = bi[r+1] - bi[r]; 2433 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2434 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2435 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2436 offdiagA[r] = 0.0; 2437 2438 /* Find first hole in the cmap */ 2439 for (j=0; j<ncols; j++) { 2440 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2441 if (col > j && j < cstart) { 2442 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2443 break; 2444 } else if (col > j + n && j >= cstart) { 2445 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2446 break; 2447 } 2448 } 2449 if (j == ncols && B->cmap->N < A->cmap->N - n) { 2450 /* a hole is outside compressed Bcols */ 2451 if (ncols == 0) { 2452 if (cstart) { 2453 offdiagIdx[r] = 0; 2454 } else offdiagIdx[r] = cend; 2455 } else { /* ncols > 0 */ 2456 offdiagIdx[r] = cmap[ncols-1] + 1; 2457 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2458 } 2459 } 2460 } 2461 2462 for (j=0; j<ncols; j++) { 2463 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2464 ba++; bj++; 2465 } 2466 } 2467 2468 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2469 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2470 for (r = 0; r < m; ++r) { 2471 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2472 a[r] = diagA[r]; 2473 if (idx) idx[r] = cstart + diagIdx[r]; 2474 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2475 a[r] = diagA[r]; 2476 if (idx) { 2477 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2478 idx[r] = cstart + diagIdx[r]; 2479 } else idx[r] = offdiagIdx[r]; 2480 } 2481 } else { 2482 a[r] = offdiagA[r]; 2483 if (idx) idx[r] = offdiagIdx[r]; 2484 } 2485 } 2486 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2487 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2488 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2489 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2490 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2491 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2492 PetscFunctionReturn(0); 2493 } 2494 2495 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2496 { 2497 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2498 PetscInt m = A->rmap->n,n = A->cmap->n; 2499 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2500 PetscInt *cmap = mat->garray; 2501 PetscInt *diagIdx, *offdiagIdx; 2502 Vec diagV, offdiagV; 2503 PetscScalar *a, *diagA, *offdiagA, *ba; 2504 PetscInt r,j,col,ncols,*bi,*bj; 2505 PetscErrorCode ierr; 2506 Mat B = mat->B; 2507 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2508 2509 PetscFunctionBegin; 2510 /* When a process holds entire A and other processes have no entry */ 2511 if (A->cmap->N == n) { 2512 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2513 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2514 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2515 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2516 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2517 PetscFunctionReturn(0); 2518 } else if (n == 0) { 2519 if (m) { 2520 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2521 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2522 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2523 } 2524 PetscFunctionReturn(0); 2525 } 2526 2527 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2528 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2529 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2530 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2531 2532 /* Get offdiagIdx[] for implicit 0.0 */ 2533 ba = b->a; 2534 bi = b->i; 2535 bj = b->j; 2536 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2537 for (r = 0; r < m; r++) { 2538 ncols = bi[r+1] - bi[r]; 2539 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2540 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2541 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2542 offdiagA[r] = 0.0; 2543 2544 /* Find first hole in the cmap */ 2545 for (j=0; j<ncols; j++) { 2546 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2547 if (col > j && j < cstart) { 2548 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2549 break; 2550 } else if (col > j + n && j >= cstart) { 2551 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2552 break; 2553 } 2554 } 2555 if (j == ncols && B->cmap->N < A->cmap->N - n) { 2556 /* a hole is outside compressed Bcols */ 2557 if (ncols == 0) { 2558 if (cstart) { 2559 offdiagIdx[r] = 0; 2560 } else offdiagIdx[r] = cend; 2561 } else { /* ncols > 0 */ 2562 offdiagIdx[r] = cmap[ncols-1] + 1; 2563 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2564 } 2565 } 2566 } 2567 2568 for (j=0; j<ncols; j++) { 2569 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2570 ba++; bj++; 2571 } 2572 } 2573 2574 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2575 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2576 for (r = 0; r < m; ++r) { 2577 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2578 a[r] = diagA[r]; 2579 if (idx) idx[r] = cstart + diagIdx[r]; 2580 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2581 a[r] = diagA[r]; 2582 if (idx) { 2583 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2584 idx[r] = cstart + diagIdx[r]; 2585 } else idx[r] = offdiagIdx[r]; 2586 } 2587 } else { 2588 a[r] = offdiagA[r]; 2589 if (idx) idx[r] = offdiagIdx[r]; 2590 } 2591 } 2592 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2593 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2594 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2595 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2596 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2597 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2598 PetscFunctionReturn(0); 2599 } 2600 2601 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2602 { 2603 PetscErrorCode ierr; 2604 Mat *dummy; 2605 2606 PetscFunctionBegin; 2607 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2608 *newmat = *dummy; 2609 ierr = PetscFree(dummy);CHKERRQ(ierr); 2610 PetscFunctionReturn(0); 2611 } 2612 2613 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2614 { 2615 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2616 PetscErrorCode ierr; 2617 2618 PetscFunctionBegin; 2619 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2620 A->factorerrortype = a->A->factorerrortype; 2621 PetscFunctionReturn(0); 2622 } 2623 2624 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2625 { 2626 PetscErrorCode ierr; 2627 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2628 2629 PetscFunctionBegin; 2630 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2631 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2632 if (x->assembled) { 2633 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2634 } else { 2635 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2636 } 2637 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2638 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2639 PetscFunctionReturn(0); 2640 } 2641 2642 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2643 { 2644 PetscFunctionBegin; 2645 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2646 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2647 PetscFunctionReturn(0); 2648 } 2649 2650 /*@ 2651 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2652 2653 Collective on Mat 2654 2655 Input Parameters: 2656 + A - the matrix 2657 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2658 2659 Level: advanced 2660 2661 @*/ 2662 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2663 { 2664 PetscErrorCode ierr; 2665 2666 PetscFunctionBegin; 2667 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2668 PetscFunctionReturn(0); 2669 } 2670 2671 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2672 { 2673 PetscErrorCode ierr; 2674 PetscBool sc = PETSC_FALSE,flg; 2675 2676 PetscFunctionBegin; 2677 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2678 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2679 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2680 if (flg) { 2681 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2682 } 2683 ierr = PetscOptionsTail();CHKERRQ(ierr); 2684 PetscFunctionReturn(0); 2685 } 2686 2687 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2688 { 2689 PetscErrorCode ierr; 2690 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2691 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2692 2693 PetscFunctionBegin; 2694 if (!Y->preallocated) { 2695 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2696 } else if (!aij->nz) { 2697 PetscInt nonew = aij->nonew; 2698 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2699 aij->nonew = nonew; 2700 } 2701 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2702 PetscFunctionReturn(0); 2703 } 2704 2705 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2708 PetscErrorCode ierr; 2709 2710 PetscFunctionBegin; 2711 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2712 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2713 if (d) { 2714 PetscInt rstart; 2715 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2716 *d += rstart; 2717 2718 } 2719 PetscFunctionReturn(0); 2720 } 2721 2722 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2723 { 2724 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2725 PetscErrorCode ierr; 2726 2727 PetscFunctionBegin; 2728 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2729 PetscFunctionReturn(0); 2730 } 2731 2732 /* -------------------------------------------------------------------*/ 2733 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2734 MatGetRow_MPIAIJ, 2735 MatRestoreRow_MPIAIJ, 2736 MatMult_MPIAIJ, 2737 /* 4*/ MatMultAdd_MPIAIJ, 2738 MatMultTranspose_MPIAIJ, 2739 MatMultTransposeAdd_MPIAIJ, 2740 NULL, 2741 NULL, 2742 NULL, 2743 /*10*/ NULL, 2744 NULL, 2745 NULL, 2746 MatSOR_MPIAIJ, 2747 MatTranspose_MPIAIJ, 2748 /*15*/ MatGetInfo_MPIAIJ, 2749 MatEqual_MPIAIJ, 2750 MatGetDiagonal_MPIAIJ, 2751 MatDiagonalScale_MPIAIJ, 2752 MatNorm_MPIAIJ, 2753 /*20*/ MatAssemblyBegin_MPIAIJ, 2754 MatAssemblyEnd_MPIAIJ, 2755 MatSetOption_MPIAIJ, 2756 MatZeroEntries_MPIAIJ, 2757 /*24*/ MatZeroRows_MPIAIJ, 2758 NULL, 2759 NULL, 2760 NULL, 2761 NULL, 2762 /*29*/ MatSetUp_MPIAIJ, 2763 NULL, 2764 NULL, 2765 MatGetDiagonalBlock_MPIAIJ, 2766 NULL, 2767 /*34*/ MatDuplicate_MPIAIJ, 2768 NULL, 2769 NULL, 2770 NULL, 2771 NULL, 2772 /*39*/ MatAXPY_MPIAIJ, 2773 MatCreateSubMatrices_MPIAIJ, 2774 MatIncreaseOverlap_MPIAIJ, 2775 MatGetValues_MPIAIJ, 2776 MatCopy_MPIAIJ, 2777 /*44*/ MatGetRowMax_MPIAIJ, 2778 MatScale_MPIAIJ, 2779 MatShift_MPIAIJ, 2780 MatDiagonalSet_MPIAIJ, 2781 MatZeroRowsColumns_MPIAIJ, 2782 /*49*/ MatSetRandom_MPIAIJ, 2783 NULL, 2784 NULL, 2785 NULL, 2786 NULL, 2787 /*54*/ MatFDColoringCreate_MPIXAIJ, 2788 NULL, 2789 MatSetUnfactored_MPIAIJ, 2790 MatPermute_MPIAIJ, 2791 NULL, 2792 /*59*/ MatCreateSubMatrix_MPIAIJ, 2793 MatDestroy_MPIAIJ, 2794 MatView_MPIAIJ, 2795 NULL, 2796 NULL, 2797 /*64*/ NULL, 2798 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2799 NULL, 2800 NULL, 2801 NULL, 2802 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2803 MatGetRowMinAbs_MPIAIJ, 2804 NULL, 2805 NULL, 2806 NULL, 2807 NULL, 2808 /*75*/ MatFDColoringApply_AIJ, 2809 MatSetFromOptions_MPIAIJ, 2810 NULL, 2811 NULL, 2812 MatFindZeroDiagonals_MPIAIJ, 2813 /*80*/ NULL, 2814 NULL, 2815 NULL, 2816 /*83*/ MatLoad_MPIAIJ, 2817 MatIsSymmetric_MPIAIJ, 2818 NULL, 2819 NULL, 2820 NULL, 2821 NULL, 2822 /*89*/ NULL, 2823 NULL, 2824 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2825 NULL, 2826 NULL, 2827 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2828 NULL, 2829 NULL, 2830 NULL, 2831 MatBindToCPU_MPIAIJ, 2832 /*99*/ MatProductSetFromOptions_MPIAIJ, 2833 NULL, 2834 NULL, 2835 MatConjugate_MPIAIJ, 2836 NULL, 2837 /*104*/MatSetValuesRow_MPIAIJ, 2838 MatRealPart_MPIAIJ, 2839 MatImaginaryPart_MPIAIJ, 2840 NULL, 2841 NULL, 2842 /*109*/NULL, 2843 NULL, 2844 MatGetRowMin_MPIAIJ, 2845 NULL, 2846 MatMissingDiagonal_MPIAIJ, 2847 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2848 NULL, 2849 MatGetGhosts_MPIAIJ, 2850 NULL, 2851 NULL, 2852 /*119*/NULL, 2853 NULL, 2854 NULL, 2855 NULL, 2856 MatGetMultiProcBlock_MPIAIJ, 2857 /*124*/MatFindNonzeroRows_MPIAIJ, 2858 MatGetColumnNorms_MPIAIJ, 2859 MatInvertBlockDiagonal_MPIAIJ, 2860 MatInvertVariableBlockDiagonal_MPIAIJ, 2861 MatCreateSubMatricesMPI_MPIAIJ, 2862 /*129*/NULL, 2863 NULL, 2864 NULL, 2865 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2866 NULL, 2867 /*134*/NULL, 2868 NULL, 2869 NULL, 2870 NULL, 2871 NULL, 2872 /*139*/MatSetBlockSizes_MPIAIJ, 2873 NULL, 2874 NULL, 2875 MatFDColoringSetUp_MPIXAIJ, 2876 MatFindOffBlockDiagonalEntries_MPIAIJ, 2877 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2878 /*145*/NULL, 2879 NULL, 2880 NULL 2881 }; 2882 2883 /* ----------------------------------------------------------------------------------------*/ 2884 2885 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2886 { 2887 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2888 PetscErrorCode ierr; 2889 2890 PetscFunctionBegin; 2891 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2892 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2893 PetscFunctionReturn(0); 2894 } 2895 2896 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2897 { 2898 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2899 PetscErrorCode ierr; 2900 2901 PetscFunctionBegin; 2902 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2903 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2904 PetscFunctionReturn(0); 2905 } 2906 2907 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2908 { 2909 Mat_MPIAIJ *b; 2910 PetscErrorCode ierr; 2911 PetscMPIInt size; 2912 2913 PetscFunctionBegin; 2914 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2915 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2916 b = (Mat_MPIAIJ*)B->data; 2917 2918 #if defined(PETSC_USE_CTABLE) 2919 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2920 #else 2921 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2922 #endif 2923 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2924 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2925 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2926 2927 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2928 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2929 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2930 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2931 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2932 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2933 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2934 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2935 2936 if (!B->preallocated) { 2937 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2938 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2939 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2940 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2941 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2942 } 2943 2944 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2945 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2946 B->preallocated = PETSC_TRUE; 2947 B->was_assembled = PETSC_FALSE; 2948 B->assembled = PETSC_FALSE; 2949 PetscFunctionReturn(0); 2950 } 2951 2952 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2953 { 2954 Mat_MPIAIJ *b; 2955 PetscErrorCode ierr; 2956 2957 PetscFunctionBegin; 2958 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2959 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2960 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2961 b = (Mat_MPIAIJ*)B->data; 2962 2963 #if defined(PETSC_USE_CTABLE) 2964 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2965 #else 2966 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2967 #endif 2968 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2969 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2970 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2971 2972 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2973 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2974 B->preallocated = PETSC_TRUE; 2975 B->was_assembled = PETSC_FALSE; 2976 B->assembled = PETSC_FALSE; 2977 PetscFunctionReturn(0); 2978 } 2979 2980 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2981 { 2982 Mat mat; 2983 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2984 PetscErrorCode ierr; 2985 2986 PetscFunctionBegin; 2987 *newmat = NULL; 2988 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2989 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2990 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2991 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2992 a = (Mat_MPIAIJ*)mat->data; 2993 2994 mat->factortype = matin->factortype; 2995 mat->assembled = matin->assembled; 2996 mat->insertmode = NOT_SET_VALUES; 2997 mat->preallocated = matin->preallocated; 2998 2999 a->size = oldmat->size; 3000 a->rank = oldmat->rank; 3001 a->donotstash = oldmat->donotstash; 3002 a->roworiented = oldmat->roworiented; 3003 a->rowindices = NULL; 3004 a->rowvalues = NULL; 3005 a->getrowactive = PETSC_FALSE; 3006 3007 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3008 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3009 3010 if (oldmat->colmap) { 3011 #if defined(PETSC_USE_CTABLE) 3012 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3013 #else 3014 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 3015 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3016 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 3017 #endif 3018 } else a->colmap = NULL; 3019 if (oldmat->garray) { 3020 PetscInt len; 3021 len = oldmat->B->cmap->n; 3022 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 3023 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3024 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 3025 } else a->garray = NULL; 3026 3027 /* It may happen MatDuplicate is called with a non-assembled matrix 3028 In fact, MatDuplicate only requires the matrix to be preallocated 3029 This may happen inside a DMCreateMatrix_Shell */ 3030 if (oldmat->lvec) { 3031 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3032 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3033 } 3034 if (oldmat->Mvctx) { 3035 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3036 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3037 } 3038 if (oldmat->Mvctx_mpi1) { 3039 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 3040 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 3041 } 3042 3043 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3044 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3045 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3046 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3047 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3048 *newmat = mat; 3049 PetscFunctionReturn(0); 3050 } 3051 3052 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3053 { 3054 PetscBool isbinary, ishdf5; 3055 PetscErrorCode ierr; 3056 3057 PetscFunctionBegin; 3058 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3059 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3060 /* force binary viewer to load .info file if it has not yet done so */ 3061 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3062 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3063 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3064 if (isbinary) { 3065 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3066 } else if (ishdf5) { 3067 #if defined(PETSC_HAVE_HDF5) 3068 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3069 #else 3070 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3071 #endif 3072 } else { 3073 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3074 } 3075 PetscFunctionReturn(0); 3076 } 3077 3078 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3079 { 3080 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3081 PetscInt *rowidxs,*colidxs; 3082 PetscScalar *matvals; 3083 PetscErrorCode ierr; 3084 3085 PetscFunctionBegin; 3086 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3087 3088 /* read in matrix header */ 3089 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3090 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3091 M = header[1]; N = header[2]; nz = header[3]; 3092 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3093 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3094 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3095 3096 /* set block sizes from the viewer's .info file */ 3097 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3098 /* set global sizes if not set already */ 3099 if (mat->rmap->N < 0) mat->rmap->N = M; 3100 if (mat->cmap->N < 0) mat->cmap->N = N; 3101 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3102 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3103 3104 /* check if the matrix sizes are correct */ 3105 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3106 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3107 3108 /* read in row lengths and build row indices */ 3109 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3110 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3111 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3112 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3113 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3114 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3115 /* read in column indices and matrix values */ 3116 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3117 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3118 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3119 /* store matrix indices and values */ 3120 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3121 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3122 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3123 PetscFunctionReturn(0); 3124 } 3125 3126 /* Not scalable because of ISAllGather() unless getting all columns. */ 3127 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3128 { 3129 PetscErrorCode ierr; 3130 IS iscol_local; 3131 PetscBool isstride; 3132 PetscMPIInt lisstride=0,gisstride; 3133 3134 PetscFunctionBegin; 3135 /* check if we are grabbing all columns*/ 3136 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3137 3138 if (isstride) { 3139 PetscInt start,len,mstart,mlen; 3140 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3141 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3142 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3143 if (mstart == start && mlen-mstart == len) lisstride = 1; 3144 } 3145 3146 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3147 if (gisstride) { 3148 PetscInt N; 3149 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3150 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3151 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3152 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3153 } else { 3154 PetscInt cbs; 3155 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3156 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3157 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3158 } 3159 3160 *isseq = iscol_local; 3161 PetscFunctionReturn(0); 3162 } 3163 3164 /* 3165 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3166 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3167 3168 Input Parameters: 3169 mat - matrix 3170 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3171 i.e., mat->rstart <= isrow[i] < mat->rend 3172 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3173 i.e., mat->cstart <= iscol[i] < mat->cend 3174 Output Parameter: 3175 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3176 iscol_o - sequential column index set for retrieving mat->B 3177 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3178 */ 3179 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3180 { 3181 PetscErrorCode ierr; 3182 Vec x,cmap; 3183 const PetscInt *is_idx; 3184 PetscScalar *xarray,*cmaparray; 3185 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3186 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3187 Mat B=a->B; 3188 Vec lvec=a->lvec,lcmap; 3189 PetscInt i,cstart,cend,Bn=B->cmap->N; 3190 MPI_Comm comm; 3191 VecScatter Mvctx=a->Mvctx; 3192 3193 PetscFunctionBegin; 3194 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3195 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3196 3197 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3198 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3199 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3200 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3201 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3202 3203 /* Get start indices */ 3204 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3205 isstart -= ncols; 3206 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3207 3208 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3209 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3210 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3211 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3212 for (i=0; i<ncols; i++) { 3213 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3214 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3215 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3216 } 3217 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3218 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3219 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3220 3221 /* Get iscol_d */ 3222 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3223 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3224 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3225 3226 /* Get isrow_d */ 3227 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3228 rstart = mat->rmap->rstart; 3229 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3230 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3231 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3232 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3233 3234 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3235 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3236 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3237 3238 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3239 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3240 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3241 3242 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3243 3244 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3245 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3246 3247 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3248 /* off-process column indices */ 3249 count = 0; 3250 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3251 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3252 3253 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3254 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3255 for (i=0; i<Bn; i++) { 3256 if (PetscRealPart(xarray[i]) > -1.0) { 3257 idx[count] = i; /* local column index in off-diagonal part B */ 3258 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3259 count++; 3260 } 3261 } 3262 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3263 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3264 3265 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3266 /* cannot ensure iscol_o has same blocksize as iscol! */ 3267 3268 ierr = PetscFree(idx);CHKERRQ(ierr); 3269 *garray = cmap1; 3270 3271 ierr = VecDestroy(&x);CHKERRQ(ierr); 3272 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3273 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3274 PetscFunctionReturn(0); 3275 } 3276 3277 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3278 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3279 { 3280 PetscErrorCode ierr; 3281 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3282 Mat M = NULL; 3283 MPI_Comm comm; 3284 IS iscol_d,isrow_d,iscol_o; 3285 Mat Asub = NULL,Bsub = NULL; 3286 PetscInt n; 3287 3288 PetscFunctionBegin; 3289 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3290 3291 if (call == MAT_REUSE_MATRIX) { 3292 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3293 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3294 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3295 3296 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3297 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3298 3299 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3300 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3301 3302 /* Update diagonal and off-diagonal portions of submat */ 3303 asub = (Mat_MPIAIJ*)(*submat)->data; 3304 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3305 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3306 if (n) { 3307 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3308 } 3309 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3310 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3311 3312 } else { /* call == MAT_INITIAL_MATRIX) */ 3313 const PetscInt *garray; 3314 PetscInt BsubN; 3315 3316 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3317 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3318 3319 /* Create local submatrices Asub and Bsub */ 3320 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3321 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3322 3323 /* Create submatrix M */ 3324 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3325 3326 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3327 asub = (Mat_MPIAIJ*)M->data; 3328 3329 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3330 n = asub->B->cmap->N; 3331 if (BsubN > n) { 3332 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3333 const PetscInt *idx; 3334 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3335 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3336 3337 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3338 j = 0; 3339 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3340 for (i=0; i<n; i++) { 3341 if (j >= BsubN) break; 3342 while (subgarray[i] > garray[j]) j++; 3343 3344 if (subgarray[i] == garray[j]) { 3345 idx_new[i] = idx[j++]; 3346 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3347 } 3348 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3349 3350 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3351 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3352 3353 } else if (BsubN < n) { 3354 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3355 } 3356 3357 ierr = PetscFree(garray);CHKERRQ(ierr); 3358 *submat = M; 3359 3360 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3361 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3362 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3363 3364 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3365 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3366 3367 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3368 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3369 } 3370 PetscFunctionReturn(0); 3371 } 3372 3373 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3374 { 3375 PetscErrorCode ierr; 3376 IS iscol_local=NULL,isrow_d; 3377 PetscInt csize; 3378 PetscInt n,i,j,start,end; 3379 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3380 MPI_Comm comm; 3381 3382 PetscFunctionBegin; 3383 /* If isrow has same processor distribution as mat, 3384 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3385 if (call == MAT_REUSE_MATRIX) { 3386 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3387 if (isrow_d) { 3388 sameRowDist = PETSC_TRUE; 3389 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3390 } else { 3391 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3392 if (iscol_local) { 3393 sameRowDist = PETSC_TRUE; 3394 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3395 } 3396 } 3397 } else { 3398 /* Check if isrow has same processor distribution as mat */ 3399 sameDist[0] = PETSC_FALSE; 3400 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3401 if (!n) { 3402 sameDist[0] = PETSC_TRUE; 3403 } else { 3404 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3405 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3406 if (i >= start && j < end) { 3407 sameDist[0] = PETSC_TRUE; 3408 } 3409 } 3410 3411 /* Check if iscol has same processor distribution as mat */ 3412 sameDist[1] = PETSC_FALSE; 3413 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3414 if (!n) { 3415 sameDist[1] = PETSC_TRUE; 3416 } else { 3417 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3418 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3419 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3420 } 3421 3422 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3423 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3424 sameRowDist = tsameDist[0]; 3425 } 3426 3427 if (sameRowDist) { 3428 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3429 /* isrow and iscol have same processor distribution as mat */ 3430 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3431 PetscFunctionReturn(0); 3432 } else { /* sameRowDist */ 3433 /* isrow has same processor distribution as mat */ 3434 if (call == MAT_INITIAL_MATRIX) { 3435 PetscBool sorted; 3436 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3437 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3438 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3439 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3440 3441 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3442 if (sorted) { 3443 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3444 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3445 PetscFunctionReturn(0); 3446 } 3447 } else { /* call == MAT_REUSE_MATRIX */ 3448 IS iscol_sub; 3449 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3450 if (iscol_sub) { 3451 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3452 PetscFunctionReturn(0); 3453 } 3454 } 3455 } 3456 } 3457 3458 /* General case: iscol -> iscol_local which has global size of iscol */ 3459 if (call == MAT_REUSE_MATRIX) { 3460 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3461 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3462 } else { 3463 if (!iscol_local) { 3464 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3465 } 3466 } 3467 3468 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3469 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3470 3471 if (call == MAT_INITIAL_MATRIX) { 3472 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3473 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3474 } 3475 PetscFunctionReturn(0); 3476 } 3477 3478 /*@C 3479 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3480 and "off-diagonal" part of the matrix in CSR format. 3481 3482 Collective 3483 3484 Input Parameters: 3485 + comm - MPI communicator 3486 . A - "diagonal" portion of matrix 3487 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3488 - garray - global index of B columns 3489 3490 Output Parameter: 3491 . mat - the matrix, with input A as its local diagonal matrix 3492 Level: advanced 3493 3494 Notes: 3495 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3496 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3497 3498 .seealso: MatCreateMPIAIJWithSplitArrays() 3499 @*/ 3500 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3501 { 3502 PetscErrorCode ierr; 3503 Mat_MPIAIJ *maij; 3504 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3505 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3506 PetscScalar *oa=b->a; 3507 Mat Bnew; 3508 PetscInt m,n,N; 3509 3510 PetscFunctionBegin; 3511 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3512 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3513 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3514 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3515 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3516 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3517 3518 /* Get global columns of mat */ 3519 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3520 3521 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3522 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3523 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3524 maij = (Mat_MPIAIJ*)(*mat)->data; 3525 3526 (*mat)->preallocated = PETSC_TRUE; 3527 3528 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3529 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3530 3531 /* Set A as diagonal portion of *mat */ 3532 maij->A = A; 3533 3534 nz = oi[m]; 3535 for (i=0; i<nz; i++) { 3536 col = oj[i]; 3537 oj[i] = garray[col]; 3538 } 3539 3540 /* Set Bnew as off-diagonal portion of *mat */ 3541 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3542 bnew = (Mat_SeqAIJ*)Bnew->data; 3543 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3544 maij->B = Bnew; 3545 3546 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3547 3548 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3549 b->free_a = PETSC_FALSE; 3550 b->free_ij = PETSC_FALSE; 3551 ierr = MatDestroy(&B);CHKERRQ(ierr); 3552 3553 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3554 bnew->free_a = PETSC_TRUE; 3555 bnew->free_ij = PETSC_TRUE; 3556 3557 /* condense columns of maij->B */ 3558 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3559 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3560 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3561 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3562 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3563 PetscFunctionReturn(0); 3564 } 3565 3566 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3567 3568 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3569 { 3570 PetscErrorCode ierr; 3571 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3572 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3573 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3574 Mat M,Msub,B=a->B; 3575 MatScalar *aa; 3576 Mat_SeqAIJ *aij; 3577 PetscInt *garray = a->garray,*colsub,Ncols; 3578 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3579 IS iscol_sub,iscmap; 3580 const PetscInt *is_idx,*cmap; 3581 PetscBool allcolumns=PETSC_FALSE; 3582 MPI_Comm comm; 3583 3584 PetscFunctionBegin; 3585 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3586 3587 if (call == MAT_REUSE_MATRIX) { 3588 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3589 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3590 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3591 3592 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3593 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3594 3595 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3596 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3597 3598 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3599 3600 } else { /* call == MAT_INITIAL_MATRIX) */ 3601 PetscBool flg; 3602 3603 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3604 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3605 3606 /* (1) iscol -> nonscalable iscol_local */ 3607 /* Check for special case: each processor gets entire matrix columns */ 3608 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3609 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3610 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3611 if (allcolumns) { 3612 iscol_sub = iscol_local; 3613 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3614 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3615 3616 } else { 3617 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3618 PetscInt *idx,*cmap1,k; 3619 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3620 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3621 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3622 count = 0; 3623 k = 0; 3624 for (i=0; i<Ncols; i++) { 3625 j = is_idx[i]; 3626 if (j >= cstart && j < cend) { 3627 /* diagonal part of mat */ 3628 idx[count] = j; 3629 cmap1[count++] = i; /* column index in submat */ 3630 } else if (Bn) { 3631 /* off-diagonal part of mat */ 3632 if (j == garray[k]) { 3633 idx[count] = j; 3634 cmap1[count++] = i; /* column index in submat */ 3635 } else if (j > garray[k]) { 3636 while (j > garray[k] && k < Bn-1) k++; 3637 if (j == garray[k]) { 3638 idx[count] = j; 3639 cmap1[count++] = i; /* column index in submat */ 3640 } 3641 } 3642 } 3643 } 3644 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3645 3646 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3647 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3648 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3649 3650 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3651 } 3652 3653 /* (3) Create sequential Msub */ 3654 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3655 } 3656 3657 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3658 aij = (Mat_SeqAIJ*)(Msub)->data; 3659 ii = aij->i; 3660 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3661 3662 /* 3663 m - number of local rows 3664 Ncols - number of columns (same on all processors) 3665 rstart - first row in new global matrix generated 3666 */ 3667 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3668 3669 if (call == MAT_INITIAL_MATRIX) { 3670 /* (4) Create parallel newmat */ 3671 PetscMPIInt rank,size; 3672 PetscInt csize; 3673 3674 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3675 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3676 3677 /* 3678 Determine the number of non-zeros in the diagonal and off-diagonal 3679 portions of the matrix in order to do correct preallocation 3680 */ 3681 3682 /* first get start and end of "diagonal" columns */ 3683 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3684 if (csize == PETSC_DECIDE) { 3685 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3686 if (mglobal == Ncols) { /* square matrix */ 3687 nlocal = m; 3688 } else { 3689 nlocal = Ncols/size + ((Ncols % size) > rank); 3690 } 3691 } else { 3692 nlocal = csize; 3693 } 3694 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3695 rstart = rend - nlocal; 3696 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3697 3698 /* next, compute all the lengths */ 3699 jj = aij->j; 3700 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3701 olens = dlens + m; 3702 for (i=0; i<m; i++) { 3703 jend = ii[i+1] - ii[i]; 3704 olen = 0; 3705 dlen = 0; 3706 for (j=0; j<jend; j++) { 3707 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3708 else dlen++; 3709 jj++; 3710 } 3711 olens[i] = olen; 3712 dlens[i] = dlen; 3713 } 3714 3715 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3716 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3717 3718 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3719 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3720 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3721 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3722 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3723 ierr = PetscFree(dlens);CHKERRQ(ierr); 3724 3725 } else { /* call == MAT_REUSE_MATRIX */ 3726 M = *newmat; 3727 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3728 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3729 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3730 /* 3731 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3732 rather than the slower MatSetValues(). 3733 */ 3734 M->was_assembled = PETSC_TRUE; 3735 M->assembled = PETSC_FALSE; 3736 } 3737 3738 /* (5) Set values of Msub to *newmat */ 3739 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3740 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3741 3742 jj = aij->j; 3743 aa = aij->a; 3744 for (i=0; i<m; i++) { 3745 row = rstart + i; 3746 nz = ii[i+1] - ii[i]; 3747 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3748 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3749 jj += nz; aa += nz; 3750 } 3751 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3752 3753 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3754 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3755 3756 ierr = PetscFree(colsub);CHKERRQ(ierr); 3757 3758 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3759 if (call == MAT_INITIAL_MATRIX) { 3760 *newmat = M; 3761 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3762 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3763 3764 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3765 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3766 3767 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3768 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3769 3770 if (iscol_local) { 3771 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3772 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3773 } 3774 } 3775 PetscFunctionReturn(0); 3776 } 3777 3778 /* 3779 Not great since it makes two copies of the submatrix, first an SeqAIJ 3780 in local and then by concatenating the local matrices the end result. 3781 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3782 3783 Note: This requires a sequential iscol with all indices. 3784 */ 3785 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3786 { 3787 PetscErrorCode ierr; 3788 PetscMPIInt rank,size; 3789 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3790 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3791 Mat M,Mreuse; 3792 MatScalar *aa,*vwork; 3793 MPI_Comm comm; 3794 Mat_SeqAIJ *aij; 3795 PetscBool colflag,allcolumns=PETSC_FALSE; 3796 3797 PetscFunctionBegin; 3798 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3799 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3800 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3801 3802 /* Check for special case: each processor gets entire matrix columns */ 3803 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3804 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3805 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3806 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3807 3808 if (call == MAT_REUSE_MATRIX) { 3809 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3810 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3811 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3812 } else { 3813 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3814 } 3815 3816 /* 3817 m - number of local rows 3818 n - number of columns (same on all processors) 3819 rstart - first row in new global matrix generated 3820 */ 3821 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3822 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3823 if (call == MAT_INITIAL_MATRIX) { 3824 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3825 ii = aij->i; 3826 jj = aij->j; 3827 3828 /* 3829 Determine the number of non-zeros in the diagonal and off-diagonal 3830 portions of the matrix in order to do correct preallocation 3831 */ 3832 3833 /* first get start and end of "diagonal" columns */ 3834 if (csize == PETSC_DECIDE) { 3835 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3836 if (mglobal == n) { /* square matrix */ 3837 nlocal = m; 3838 } else { 3839 nlocal = n/size + ((n % size) > rank); 3840 } 3841 } else { 3842 nlocal = csize; 3843 } 3844 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3845 rstart = rend - nlocal; 3846 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3847 3848 /* next, compute all the lengths */ 3849 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3850 olens = dlens + m; 3851 for (i=0; i<m; i++) { 3852 jend = ii[i+1] - ii[i]; 3853 olen = 0; 3854 dlen = 0; 3855 for (j=0; j<jend; j++) { 3856 if (*jj < rstart || *jj >= rend) olen++; 3857 else dlen++; 3858 jj++; 3859 } 3860 olens[i] = olen; 3861 dlens[i] = dlen; 3862 } 3863 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3864 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3865 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3866 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3867 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3868 ierr = PetscFree(dlens);CHKERRQ(ierr); 3869 } else { 3870 PetscInt ml,nl; 3871 3872 M = *newmat; 3873 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3874 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3875 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3876 /* 3877 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3878 rather than the slower MatSetValues(). 3879 */ 3880 M->was_assembled = PETSC_TRUE; 3881 M->assembled = PETSC_FALSE; 3882 } 3883 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3884 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3885 ii = aij->i; 3886 jj = aij->j; 3887 aa = aij->a; 3888 for (i=0; i<m; i++) { 3889 row = rstart + i; 3890 nz = ii[i+1] - ii[i]; 3891 cwork = jj; jj += nz; 3892 vwork = aa; aa += nz; 3893 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3894 } 3895 3896 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3897 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3898 *newmat = M; 3899 3900 /* save submatrix used in processor for next request */ 3901 if (call == MAT_INITIAL_MATRIX) { 3902 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3903 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3904 } 3905 PetscFunctionReturn(0); 3906 } 3907 3908 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3909 { 3910 PetscInt m,cstart, cend,j,nnz,i,d; 3911 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3912 const PetscInt *JJ; 3913 PetscErrorCode ierr; 3914 PetscBool nooffprocentries; 3915 3916 PetscFunctionBegin; 3917 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3918 3919 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3920 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3921 m = B->rmap->n; 3922 cstart = B->cmap->rstart; 3923 cend = B->cmap->rend; 3924 rstart = B->rmap->rstart; 3925 3926 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3927 3928 if (PetscDefined(USE_DEBUG)) { 3929 for (i=0; i<m; i++) { 3930 nnz = Ii[i+1]- Ii[i]; 3931 JJ = J + Ii[i]; 3932 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3933 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3934 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3935 } 3936 } 3937 3938 for (i=0; i<m; i++) { 3939 nnz = Ii[i+1]- Ii[i]; 3940 JJ = J + Ii[i]; 3941 nnz_max = PetscMax(nnz_max,nnz); 3942 d = 0; 3943 for (j=0; j<nnz; j++) { 3944 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3945 } 3946 d_nnz[i] = d; 3947 o_nnz[i] = nnz - d; 3948 } 3949 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3950 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3951 3952 for (i=0; i<m; i++) { 3953 ii = i + rstart; 3954 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3955 } 3956 nooffprocentries = B->nooffprocentries; 3957 B->nooffprocentries = PETSC_TRUE; 3958 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3959 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3960 B->nooffprocentries = nooffprocentries; 3961 3962 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3963 PetscFunctionReturn(0); 3964 } 3965 3966 /*@ 3967 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3968 (the default parallel PETSc format). 3969 3970 Collective 3971 3972 Input Parameters: 3973 + B - the matrix 3974 . i - the indices into j for the start of each local row (starts with zero) 3975 . j - the column indices for each local row (starts with zero) 3976 - v - optional values in the matrix 3977 3978 Level: developer 3979 3980 Notes: 3981 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3982 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3983 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3984 3985 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3986 3987 The format which is used for the sparse matrix input, is equivalent to a 3988 row-major ordering.. i.e for the following matrix, the input data expected is 3989 as shown 3990 3991 $ 1 0 0 3992 $ 2 0 3 P0 3993 $ ------- 3994 $ 4 5 6 P1 3995 $ 3996 $ Process0 [P0]: rows_owned=[0,1] 3997 $ i = {0,1,3} [size = nrow+1 = 2+1] 3998 $ j = {0,0,2} [size = 3] 3999 $ v = {1,2,3} [size = 3] 4000 $ 4001 $ Process1 [P1]: rows_owned=[2] 4002 $ i = {0,3} [size = nrow+1 = 1+1] 4003 $ j = {0,1,2} [size = 3] 4004 $ v = {4,5,6} [size = 3] 4005 4006 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4007 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4008 @*/ 4009 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4010 { 4011 PetscErrorCode ierr; 4012 4013 PetscFunctionBegin; 4014 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4015 PetscFunctionReturn(0); 4016 } 4017 4018 /*@C 4019 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4020 (the default parallel PETSc format). For good matrix assembly performance 4021 the user should preallocate the matrix storage by setting the parameters 4022 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4023 performance can be increased by more than a factor of 50. 4024 4025 Collective 4026 4027 Input Parameters: 4028 + B - the matrix 4029 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4030 (same value is used for all local rows) 4031 . d_nnz - array containing the number of nonzeros in the various rows of the 4032 DIAGONAL portion of the local submatrix (possibly different for each row) 4033 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4034 The size of this array is equal to the number of local rows, i.e 'm'. 4035 For matrices that will be factored, you must leave room for (and set) 4036 the diagonal entry even if it is zero. 4037 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4038 submatrix (same value is used for all local rows). 4039 - o_nnz - array containing the number of nonzeros in the various rows of the 4040 OFF-DIAGONAL portion of the local submatrix (possibly different for 4041 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4042 structure. The size of this array is equal to the number 4043 of local rows, i.e 'm'. 4044 4045 If the *_nnz parameter is given then the *_nz parameter is ignored 4046 4047 The AIJ format (also called the Yale sparse matrix format or 4048 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4049 storage. The stored row and column indices begin with zero. 4050 See Users-Manual: ch_mat for details. 4051 4052 The parallel matrix is partitioned such that the first m0 rows belong to 4053 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4054 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4055 4056 The DIAGONAL portion of the local submatrix of a processor can be defined 4057 as the submatrix which is obtained by extraction the part corresponding to 4058 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4059 first row that belongs to the processor, r2 is the last row belonging to 4060 the this processor, and c1-c2 is range of indices of the local part of a 4061 vector suitable for applying the matrix to. This is an mxn matrix. In the 4062 common case of a square matrix, the row and column ranges are the same and 4063 the DIAGONAL part is also square. The remaining portion of the local 4064 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4065 4066 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4067 4068 You can call MatGetInfo() to get information on how effective the preallocation was; 4069 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4070 You can also run with the option -info and look for messages with the string 4071 malloc in them to see if additional memory allocation was needed. 4072 4073 Example usage: 4074 4075 Consider the following 8x8 matrix with 34 non-zero values, that is 4076 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4077 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4078 as follows: 4079 4080 .vb 4081 1 2 0 | 0 3 0 | 0 4 4082 Proc0 0 5 6 | 7 0 0 | 8 0 4083 9 0 10 | 11 0 0 | 12 0 4084 ------------------------------------- 4085 13 0 14 | 15 16 17 | 0 0 4086 Proc1 0 18 0 | 19 20 21 | 0 0 4087 0 0 0 | 22 23 0 | 24 0 4088 ------------------------------------- 4089 Proc2 25 26 27 | 0 0 28 | 29 0 4090 30 0 0 | 31 32 33 | 0 34 4091 .ve 4092 4093 This can be represented as a collection of submatrices as: 4094 4095 .vb 4096 A B C 4097 D E F 4098 G H I 4099 .ve 4100 4101 Where the submatrices A,B,C are owned by proc0, D,E,F are 4102 owned by proc1, G,H,I are owned by proc2. 4103 4104 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4105 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4106 The 'M','N' parameters are 8,8, and have the same values on all procs. 4107 4108 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4109 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4110 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4111 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4112 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4113 matrix, ans [DF] as another SeqAIJ matrix. 4114 4115 When d_nz, o_nz parameters are specified, d_nz storage elements are 4116 allocated for every row of the local diagonal submatrix, and o_nz 4117 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4118 One way to choose d_nz and o_nz is to use the max nonzerors per local 4119 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4120 In this case, the values of d_nz,o_nz are: 4121 .vb 4122 proc0 : dnz = 2, o_nz = 2 4123 proc1 : dnz = 3, o_nz = 2 4124 proc2 : dnz = 1, o_nz = 4 4125 .ve 4126 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4127 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4128 for proc3. i.e we are using 12+15+10=37 storage locations to store 4129 34 values. 4130 4131 When d_nnz, o_nnz parameters are specified, the storage is specified 4132 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4133 In the above case the values for d_nnz,o_nnz are: 4134 .vb 4135 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4136 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4137 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4138 .ve 4139 Here the space allocated is sum of all the above values i.e 34, and 4140 hence pre-allocation is perfect. 4141 4142 Level: intermediate 4143 4144 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4145 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4146 @*/ 4147 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4148 { 4149 PetscErrorCode ierr; 4150 4151 PetscFunctionBegin; 4152 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4153 PetscValidType(B,1); 4154 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4155 PetscFunctionReturn(0); 4156 } 4157 4158 /*@ 4159 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4160 CSR format for the local rows. 4161 4162 Collective 4163 4164 Input Parameters: 4165 + comm - MPI communicator 4166 . m - number of local rows (Cannot be PETSC_DECIDE) 4167 . n - This value should be the same as the local size used in creating the 4168 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4169 calculated if N is given) For square matrices n is almost always m. 4170 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4171 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4172 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4173 . j - column indices 4174 - a - matrix values 4175 4176 Output Parameter: 4177 . mat - the matrix 4178 4179 Level: intermediate 4180 4181 Notes: 4182 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4183 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4184 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4185 4186 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4187 4188 The format which is used for the sparse matrix input, is equivalent to a 4189 row-major ordering.. i.e for the following matrix, the input data expected is 4190 as shown 4191 4192 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4193 4194 $ 1 0 0 4195 $ 2 0 3 P0 4196 $ ------- 4197 $ 4 5 6 P1 4198 $ 4199 $ Process0 [P0]: rows_owned=[0,1] 4200 $ i = {0,1,3} [size = nrow+1 = 2+1] 4201 $ j = {0,0,2} [size = 3] 4202 $ v = {1,2,3} [size = 3] 4203 $ 4204 $ Process1 [P1]: rows_owned=[2] 4205 $ i = {0,3} [size = nrow+1 = 1+1] 4206 $ j = {0,1,2} [size = 3] 4207 $ v = {4,5,6} [size = 3] 4208 4209 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4210 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4211 @*/ 4212 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4213 { 4214 PetscErrorCode ierr; 4215 4216 PetscFunctionBegin; 4217 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4218 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4219 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4220 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4221 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4222 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4223 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4224 PetscFunctionReturn(0); 4225 } 4226 4227 /*@ 4228 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4229 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4230 4231 Collective 4232 4233 Input Parameters: 4234 + mat - the matrix 4235 . m - number of local rows (Cannot be PETSC_DECIDE) 4236 . n - This value should be the same as the local size used in creating the 4237 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4238 calculated if N is given) For square matrices n is almost always m. 4239 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4240 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4241 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4242 . J - column indices 4243 - v - matrix values 4244 4245 Level: intermediate 4246 4247 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4248 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4249 @*/ 4250 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4251 { 4252 PetscErrorCode ierr; 4253 PetscInt cstart,nnz,i,j; 4254 PetscInt *ld; 4255 PetscBool nooffprocentries; 4256 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4257 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4258 PetscScalar *ad = Ad->a, *ao = Ao->a; 4259 const PetscInt *Adi = Ad->i; 4260 PetscInt ldi,Iii,md; 4261 4262 PetscFunctionBegin; 4263 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4264 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4265 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4266 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4267 4268 cstart = mat->cmap->rstart; 4269 if (!Aij->ld) { 4270 /* count number of entries below block diagonal */ 4271 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4272 Aij->ld = ld; 4273 for (i=0; i<m; i++) { 4274 nnz = Ii[i+1]- Ii[i]; 4275 j = 0; 4276 while (J[j] < cstart && j < nnz) {j++;} 4277 J += nnz; 4278 ld[i] = j; 4279 } 4280 } else { 4281 ld = Aij->ld; 4282 } 4283 4284 for (i=0; i<m; i++) { 4285 nnz = Ii[i+1]- Ii[i]; 4286 Iii = Ii[i]; 4287 ldi = ld[i]; 4288 md = Adi[i+1]-Adi[i]; 4289 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4290 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4291 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4292 ad += md; 4293 ao += nnz - md; 4294 } 4295 nooffprocentries = mat->nooffprocentries; 4296 mat->nooffprocentries = PETSC_TRUE; 4297 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4298 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4299 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4300 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4301 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4302 mat->nooffprocentries = nooffprocentries; 4303 PetscFunctionReturn(0); 4304 } 4305 4306 /*@C 4307 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4308 (the default parallel PETSc format). For good matrix assembly performance 4309 the user should preallocate the matrix storage by setting the parameters 4310 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4311 performance can be increased by more than a factor of 50. 4312 4313 Collective 4314 4315 Input Parameters: 4316 + comm - MPI communicator 4317 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4318 This value should be the same as the local size used in creating the 4319 y vector for the matrix-vector product y = Ax. 4320 . n - This value should be the same as the local size used in creating the 4321 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4322 calculated if N is given) For square matrices n is almost always m. 4323 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4324 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4325 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4326 (same value is used for all local rows) 4327 . d_nnz - array containing the number of nonzeros in the various rows of the 4328 DIAGONAL portion of the local submatrix (possibly different for each row) 4329 or NULL, if d_nz is used to specify the nonzero structure. 4330 The size of this array is equal to the number of local rows, i.e 'm'. 4331 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4332 submatrix (same value is used for all local rows). 4333 - o_nnz - array containing the number of nonzeros in the various rows of the 4334 OFF-DIAGONAL portion of the local submatrix (possibly different for 4335 each row) or NULL, if o_nz is used to specify the nonzero 4336 structure. The size of this array is equal to the number 4337 of local rows, i.e 'm'. 4338 4339 Output Parameter: 4340 . A - the matrix 4341 4342 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4343 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4344 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4345 4346 Notes: 4347 If the *_nnz parameter is given then the *_nz parameter is ignored 4348 4349 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4350 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4351 storage requirements for this matrix. 4352 4353 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4354 processor than it must be used on all processors that share the object for 4355 that argument. 4356 4357 The user MUST specify either the local or global matrix dimensions 4358 (possibly both). 4359 4360 The parallel matrix is partitioned across processors such that the 4361 first m0 rows belong to process 0, the next m1 rows belong to 4362 process 1, the next m2 rows belong to process 2 etc.. where 4363 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4364 values corresponding to [m x N] submatrix. 4365 4366 The columns are logically partitioned with the n0 columns belonging 4367 to 0th partition, the next n1 columns belonging to the next 4368 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4369 4370 The DIAGONAL portion of the local submatrix on any given processor 4371 is the submatrix corresponding to the rows and columns m,n 4372 corresponding to the given processor. i.e diagonal matrix on 4373 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4374 etc. The remaining portion of the local submatrix [m x (N-n)] 4375 constitute the OFF-DIAGONAL portion. The example below better 4376 illustrates this concept. 4377 4378 For a square global matrix we define each processor's diagonal portion 4379 to be its local rows and the corresponding columns (a square submatrix); 4380 each processor's off-diagonal portion encompasses the remainder of the 4381 local matrix (a rectangular submatrix). 4382 4383 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4384 4385 When calling this routine with a single process communicator, a matrix of 4386 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4387 type of communicator, use the construction mechanism 4388 .vb 4389 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4390 .ve 4391 4392 $ MatCreate(...,&A); 4393 $ MatSetType(A,MATMPIAIJ); 4394 $ MatSetSizes(A, m,n,M,N); 4395 $ MatMPIAIJSetPreallocation(A,...); 4396 4397 By default, this format uses inodes (identical nodes) when possible. 4398 We search for consecutive rows with the same nonzero structure, thereby 4399 reusing matrix information to achieve increased efficiency. 4400 4401 Options Database Keys: 4402 + -mat_no_inode - Do not use inodes 4403 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4404 4405 4406 4407 Example usage: 4408 4409 Consider the following 8x8 matrix with 34 non-zero values, that is 4410 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4411 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4412 as follows 4413 4414 .vb 4415 1 2 0 | 0 3 0 | 0 4 4416 Proc0 0 5 6 | 7 0 0 | 8 0 4417 9 0 10 | 11 0 0 | 12 0 4418 ------------------------------------- 4419 13 0 14 | 15 16 17 | 0 0 4420 Proc1 0 18 0 | 19 20 21 | 0 0 4421 0 0 0 | 22 23 0 | 24 0 4422 ------------------------------------- 4423 Proc2 25 26 27 | 0 0 28 | 29 0 4424 30 0 0 | 31 32 33 | 0 34 4425 .ve 4426 4427 This can be represented as a collection of submatrices as 4428 4429 .vb 4430 A B C 4431 D E F 4432 G H I 4433 .ve 4434 4435 Where the submatrices A,B,C are owned by proc0, D,E,F are 4436 owned by proc1, G,H,I are owned by proc2. 4437 4438 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4439 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4440 The 'M','N' parameters are 8,8, and have the same values on all procs. 4441 4442 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4443 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4444 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4445 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4446 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4447 matrix, ans [DF] as another SeqAIJ matrix. 4448 4449 When d_nz, o_nz parameters are specified, d_nz storage elements are 4450 allocated for every row of the local diagonal submatrix, and o_nz 4451 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4452 One way to choose d_nz and o_nz is to use the max nonzerors per local 4453 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4454 In this case, the values of d_nz,o_nz are 4455 .vb 4456 proc0 : dnz = 2, o_nz = 2 4457 proc1 : dnz = 3, o_nz = 2 4458 proc2 : dnz = 1, o_nz = 4 4459 .ve 4460 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4461 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4462 for proc3. i.e we are using 12+15+10=37 storage locations to store 4463 34 values. 4464 4465 When d_nnz, o_nnz parameters are specified, the storage is specified 4466 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4467 In the above case the values for d_nnz,o_nnz are 4468 .vb 4469 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4470 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4471 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4472 .ve 4473 Here the space allocated is sum of all the above values i.e 34, and 4474 hence pre-allocation is perfect. 4475 4476 Level: intermediate 4477 4478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4479 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4480 @*/ 4481 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4482 { 4483 PetscErrorCode ierr; 4484 PetscMPIInt size; 4485 4486 PetscFunctionBegin; 4487 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4488 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4489 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4490 if (size > 1) { 4491 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4492 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4493 } else { 4494 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4495 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4496 } 4497 PetscFunctionReturn(0); 4498 } 4499 4500 /*@C 4501 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4502 4503 Not collective 4504 4505 Input Parameter: 4506 . A - The MPIAIJ matrix 4507 4508 Output Parameters: 4509 + Ad - The local diagonal block as a SeqAIJ matrix 4510 . Ao - The local off-diagonal block as a SeqAIJ matrix 4511 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4512 4513 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4514 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4515 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4516 local column numbers to global column numbers in the original matrix. 4517 4518 Level: intermediate 4519 4520 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4521 @*/ 4522 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4523 { 4524 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4525 PetscBool flg; 4526 PetscErrorCode ierr; 4527 4528 PetscFunctionBegin; 4529 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4530 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4531 if (Ad) *Ad = a->A; 4532 if (Ao) *Ao = a->B; 4533 if (colmap) *colmap = a->garray; 4534 PetscFunctionReturn(0); 4535 } 4536 4537 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4538 { 4539 PetscErrorCode ierr; 4540 PetscInt m,N,i,rstart,nnz,Ii; 4541 PetscInt *indx; 4542 PetscScalar *values; 4543 4544 PetscFunctionBegin; 4545 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4546 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4547 PetscInt *dnz,*onz,sum,bs,cbs; 4548 4549 if (n == PETSC_DECIDE) { 4550 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4551 } 4552 /* Check sum(n) = N */ 4553 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4554 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4555 4556 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4557 rstart -= m; 4558 4559 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4560 for (i=0; i<m; i++) { 4561 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4562 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4563 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4564 } 4565 4566 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4567 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4568 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4569 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4570 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4571 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4572 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4573 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4574 } 4575 4576 /* numeric phase */ 4577 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4578 for (i=0; i<m; i++) { 4579 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4580 Ii = i + rstart; 4581 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4582 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4583 } 4584 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4585 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4586 PetscFunctionReturn(0); 4587 } 4588 4589 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4590 { 4591 PetscErrorCode ierr; 4592 PetscMPIInt rank; 4593 PetscInt m,N,i,rstart,nnz; 4594 size_t len; 4595 const PetscInt *indx; 4596 PetscViewer out; 4597 char *name; 4598 Mat B; 4599 const PetscScalar *values; 4600 4601 PetscFunctionBegin; 4602 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4603 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4604 /* Should this be the type of the diagonal block of A? */ 4605 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4606 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4607 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4608 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4609 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4610 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4611 for (i=0; i<m; i++) { 4612 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4613 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4614 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4615 } 4616 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4617 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4618 4619 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4620 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4621 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4622 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4623 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4624 ierr = PetscFree(name);CHKERRQ(ierr); 4625 ierr = MatView(B,out);CHKERRQ(ierr); 4626 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4627 ierr = MatDestroy(&B);CHKERRQ(ierr); 4628 PetscFunctionReturn(0); 4629 } 4630 4631 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4632 { 4633 PetscErrorCode ierr; 4634 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4635 4636 PetscFunctionBegin; 4637 if (!merge) PetscFunctionReturn(0); 4638 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4639 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4640 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4641 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4642 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4643 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4644 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4645 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4646 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4647 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4648 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4649 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4650 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4651 ierr = PetscFree(merge);CHKERRQ(ierr); 4652 PetscFunctionReturn(0); 4653 } 4654 4655 #include <../src/mat/utils/freespace.h> 4656 #include <petscbt.h> 4657 4658 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4659 { 4660 PetscErrorCode ierr; 4661 MPI_Comm comm; 4662 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4663 PetscMPIInt size,rank,taga,*len_s; 4664 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4665 PetscInt proc,m; 4666 PetscInt **buf_ri,**buf_rj; 4667 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4668 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4669 MPI_Request *s_waits,*r_waits; 4670 MPI_Status *status; 4671 MatScalar *aa=a->a; 4672 MatScalar **abuf_r,*ba_i; 4673 Mat_Merge_SeqsToMPI *merge; 4674 PetscContainer container; 4675 4676 PetscFunctionBegin; 4677 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4678 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4679 4680 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4681 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4682 4683 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4684 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4685 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4686 4687 bi = merge->bi; 4688 bj = merge->bj; 4689 buf_ri = merge->buf_ri; 4690 buf_rj = merge->buf_rj; 4691 4692 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4693 owners = merge->rowmap->range; 4694 len_s = merge->len_s; 4695 4696 /* send and recv matrix values */ 4697 /*-----------------------------*/ 4698 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4699 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4700 4701 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4702 for (proc=0,k=0; proc<size; proc++) { 4703 if (!len_s[proc]) continue; 4704 i = owners[proc]; 4705 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4706 k++; 4707 } 4708 4709 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4710 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4711 ierr = PetscFree(status);CHKERRQ(ierr); 4712 4713 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4714 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4715 4716 /* insert mat values of mpimat */ 4717 /*----------------------------*/ 4718 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4719 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4720 4721 for (k=0; k<merge->nrecv; k++) { 4722 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4723 nrows = *(buf_ri_k[k]); 4724 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4725 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4726 } 4727 4728 /* set values of ba */ 4729 m = merge->rowmap->n; 4730 for (i=0; i<m; i++) { 4731 arow = owners[rank] + i; 4732 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4733 bnzi = bi[i+1] - bi[i]; 4734 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4735 4736 /* add local non-zero vals of this proc's seqmat into ba */ 4737 anzi = ai[arow+1] - ai[arow]; 4738 aj = a->j + ai[arow]; 4739 aa = a->a + ai[arow]; 4740 nextaj = 0; 4741 for (j=0; nextaj<anzi; j++) { 4742 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4743 ba_i[j] += aa[nextaj++]; 4744 } 4745 } 4746 4747 /* add received vals into ba */ 4748 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4749 /* i-th row */ 4750 if (i == *nextrow[k]) { 4751 anzi = *(nextai[k]+1) - *nextai[k]; 4752 aj = buf_rj[k] + *(nextai[k]); 4753 aa = abuf_r[k] + *(nextai[k]); 4754 nextaj = 0; 4755 for (j=0; nextaj<anzi; j++) { 4756 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4757 ba_i[j] += aa[nextaj++]; 4758 } 4759 } 4760 nextrow[k]++; nextai[k]++; 4761 } 4762 } 4763 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4764 } 4765 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4766 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4767 4768 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4769 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4770 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4771 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4772 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4773 PetscFunctionReturn(0); 4774 } 4775 4776 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4777 { 4778 PetscErrorCode ierr; 4779 Mat B_mpi; 4780 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4781 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4782 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4783 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4784 PetscInt len,proc,*dnz,*onz,bs,cbs; 4785 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4786 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4787 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4788 MPI_Status *status; 4789 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4790 PetscBT lnkbt; 4791 Mat_Merge_SeqsToMPI *merge; 4792 PetscContainer container; 4793 4794 PetscFunctionBegin; 4795 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4796 4797 /* make sure it is a PETSc comm */ 4798 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4799 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4800 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4801 4802 ierr = PetscNew(&merge);CHKERRQ(ierr); 4803 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4804 4805 /* determine row ownership */ 4806 /*---------------------------------------------------------*/ 4807 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4808 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4809 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4810 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4811 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4812 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4813 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4814 4815 m = merge->rowmap->n; 4816 owners = merge->rowmap->range; 4817 4818 /* determine the number of messages to send, their lengths */ 4819 /*---------------------------------------------------------*/ 4820 len_s = merge->len_s; 4821 4822 len = 0; /* length of buf_si[] */ 4823 merge->nsend = 0; 4824 for (proc=0; proc<size; proc++) { 4825 len_si[proc] = 0; 4826 if (proc == rank) { 4827 len_s[proc] = 0; 4828 } else { 4829 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4830 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4831 } 4832 if (len_s[proc]) { 4833 merge->nsend++; 4834 nrows = 0; 4835 for (i=owners[proc]; i<owners[proc+1]; i++) { 4836 if (ai[i+1] > ai[i]) nrows++; 4837 } 4838 len_si[proc] = 2*(nrows+1); 4839 len += len_si[proc]; 4840 } 4841 } 4842 4843 /* determine the number and length of messages to receive for ij-structure */ 4844 /*-------------------------------------------------------------------------*/ 4845 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4846 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4847 4848 /* post the Irecv of j-structure */ 4849 /*-------------------------------*/ 4850 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4851 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4852 4853 /* post the Isend of j-structure */ 4854 /*--------------------------------*/ 4855 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4856 4857 for (proc=0, k=0; proc<size; proc++) { 4858 if (!len_s[proc]) continue; 4859 i = owners[proc]; 4860 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4861 k++; 4862 } 4863 4864 /* receives and sends of j-structure are complete */ 4865 /*------------------------------------------------*/ 4866 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4867 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4868 4869 /* send and recv i-structure */ 4870 /*---------------------------*/ 4871 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4872 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4873 4874 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4875 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4876 for (proc=0,k=0; proc<size; proc++) { 4877 if (!len_s[proc]) continue; 4878 /* form outgoing message for i-structure: 4879 buf_si[0]: nrows to be sent 4880 [1:nrows]: row index (global) 4881 [nrows+1:2*nrows+1]: i-structure index 4882 */ 4883 /*-------------------------------------------*/ 4884 nrows = len_si[proc]/2 - 1; 4885 buf_si_i = buf_si + nrows+1; 4886 buf_si[0] = nrows; 4887 buf_si_i[0] = 0; 4888 nrows = 0; 4889 for (i=owners[proc]; i<owners[proc+1]; i++) { 4890 anzi = ai[i+1] - ai[i]; 4891 if (anzi) { 4892 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4893 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4894 nrows++; 4895 } 4896 } 4897 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4898 k++; 4899 buf_si += len_si[proc]; 4900 } 4901 4902 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4903 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4904 4905 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4906 for (i=0; i<merge->nrecv; i++) { 4907 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4908 } 4909 4910 ierr = PetscFree(len_si);CHKERRQ(ierr); 4911 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4912 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4913 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4914 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4915 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4916 ierr = PetscFree(status);CHKERRQ(ierr); 4917 4918 /* compute a local seq matrix in each processor */ 4919 /*----------------------------------------------*/ 4920 /* allocate bi array and free space for accumulating nonzero column info */ 4921 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4922 bi[0] = 0; 4923 4924 /* create and initialize a linked list */ 4925 nlnk = N+1; 4926 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4927 4928 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4929 len = ai[owners[rank+1]] - ai[owners[rank]]; 4930 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4931 4932 current_space = free_space; 4933 4934 /* determine symbolic info for each local row */ 4935 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4936 4937 for (k=0; k<merge->nrecv; k++) { 4938 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4939 nrows = *buf_ri_k[k]; 4940 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4941 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4942 } 4943 4944 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4945 len = 0; 4946 for (i=0; i<m; i++) { 4947 bnzi = 0; 4948 /* add local non-zero cols of this proc's seqmat into lnk */ 4949 arow = owners[rank] + i; 4950 anzi = ai[arow+1] - ai[arow]; 4951 aj = a->j + ai[arow]; 4952 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4953 bnzi += nlnk; 4954 /* add received col data into lnk */ 4955 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4956 if (i == *nextrow[k]) { /* i-th row */ 4957 anzi = *(nextai[k]+1) - *nextai[k]; 4958 aj = buf_rj[k] + *nextai[k]; 4959 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4960 bnzi += nlnk; 4961 nextrow[k]++; nextai[k]++; 4962 } 4963 } 4964 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4965 4966 /* if free space is not available, make more free space */ 4967 if (current_space->local_remaining<bnzi) { 4968 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4969 nspacedouble++; 4970 } 4971 /* copy data into free space, then initialize lnk */ 4972 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4973 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4974 4975 current_space->array += bnzi; 4976 current_space->local_used += bnzi; 4977 current_space->local_remaining -= bnzi; 4978 4979 bi[i+1] = bi[i] + bnzi; 4980 } 4981 4982 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4983 4984 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4985 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4986 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4987 4988 /* create symbolic parallel matrix B_mpi */ 4989 /*---------------------------------------*/ 4990 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4991 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4992 if (n==PETSC_DECIDE) { 4993 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4994 } else { 4995 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4996 } 4997 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4998 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4999 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5000 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5001 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5002 5003 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5004 B_mpi->assembled = PETSC_FALSE; 5005 merge->bi = bi; 5006 merge->bj = bj; 5007 merge->buf_ri = buf_ri; 5008 merge->buf_rj = buf_rj; 5009 merge->coi = NULL; 5010 merge->coj = NULL; 5011 merge->owners_co = NULL; 5012 5013 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5014 5015 /* attach the supporting struct to B_mpi for reuse */ 5016 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5017 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5018 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 5019 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5020 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5021 *mpimat = B_mpi; 5022 5023 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5024 PetscFunctionReturn(0); 5025 } 5026 5027 /*@C 5028 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5029 matrices from each processor 5030 5031 Collective 5032 5033 Input Parameters: 5034 + comm - the communicators the parallel matrix will live on 5035 . seqmat - the input sequential matrices 5036 . m - number of local rows (or PETSC_DECIDE) 5037 . n - number of local columns (or PETSC_DECIDE) 5038 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5039 5040 Output Parameter: 5041 . mpimat - the parallel matrix generated 5042 5043 Level: advanced 5044 5045 Notes: 5046 The dimensions of the sequential matrix in each processor MUST be the same. 5047 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5048 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5049 @*/ 5050 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5051 { 5052 PetscErrorCode ierr; 5053 PetscMPIInt size; 5054 5055 PetscFunctionBegin; 5056 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5057 if (size == 1) { 5058 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5059 if (scall == MAT_INITIAL_MATRIX) { 5060 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5061 } else { 5062 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5063 } 5064 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5065 PetscFunctionReturn(0); 5066 } 5067 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5068 if (scall == MAT_INITIAL_MATRIX) { 5069 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5070 } 5071 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5072 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5073 PetscFunctionReturn(0); 5074 } 5075 5076 /*@ 5077 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5078 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5079 with MatGetSize() 5080 5081 Not Collective 5082 5083 Input Parameters: 5084 + A - the matrix 5085 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5086 5087 Output Parameter: 5088 . A_loc - the local sequential matrix generated 5089 5090 Level: developer 5091 5092 Notes: 5093 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5094 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5095 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5096 modify the values of the returned A_loc. 5097 5098 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5099 5100 @*/ 5101 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5102 { 5103 PetscErrorCode ierr; 5104 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5105 Mat_SeqAIJ *mat,*a,*b; 5106 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5107 MatScalar *aa,*ba,*cam; 5108 PetscScalar *ca; 5109 PetscMPIInt size; 5110 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5111 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5112 PetscBool match; 5113 5114 PetscFunctionBegin; 5115 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5116 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5117 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 5118 if (size == 1) { 5119 if (scall == MAT_INITIAL_MATRIX) { 5120 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5121 *A_loc = mpimat->A; 5122 } else if (scall == MAT_REUSE_MATRIX) { 5123 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5124 } 5125 PetscFunctionReturn(0); 5126 } 5127 5128 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5129 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5130 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5131 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5132 aa = a->a; ba = b->a; 5133 if (scall == MAT_INITIAL_MATRIX) { 5134 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5135 ci[0] = 0; 5136 for (i=0; i<am; i++) { 5137 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5138 } 5139 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5140 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5141 k = 0; 5142 for (i=0; i<am; i++) { 5143 ncols_o = bi[i+1] - bi[i]; 5144 ncols_d = ai[i+1] - ai[i]; 5145 /* off-diagonal portion of A */ 5146 for (jo=0; jo<ncols_o; jo++) { 5147 col = cmap[*bj]; 5148 if (col >= cstart) break; 5149 cj[k] = col; bj++; 5150 ca[k++] = *ba++; 5151 } 5152 /* diagonal portion of A */ 5153 for (j=0; j<ncols_d; j++) { 5154 cj[k] = cstart + *aj++; 5155 ca[k++] = *aa++; 5156 } 5157 /* off-diagonal portion of A */ 5158 for (j=jo; j<ncols_o; j++) { 5159 cj[k] = cmap[*bj++]; 5160 ca[k++] = *ba++; 5161 } 5162 } 5163 /* put together the new matrix */ 5164 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5165 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5166 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5167 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5168 mat->free_a = PETSC_TRUE; 5169 mat->free_ij = PETSC_TRUE; 5170 mat->nonew = 0; 5171 } else if (scall == MAT_REUSE_MATRIX) { 5172 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5173 ci = mat->i; cj = mat->j; cam = mat->a; 5174 for (i=0; i<am; i++) { 5175 /* off-diagonal portion of A */ 5176 ncols_o = bi[i+1] - bi[i]; 5177 for (jo=0; jo<ncols_o; jo++) { 5178 col = cmap[*bj]; 5179 if (col >= cstart) break; 5180 *cam++ = *ba++; bj++; 5181 } 5182 /* diagonal portion of A */ 5183 ncols_d = ai[i+1] - ai[i]; 5184 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5185 /* off-diagonal portion of A */ 5186 for (j=jo; j<ncols_o; j++) { 5187 *cam++ = *ba++; bj++; 5188 } 5189 } 5190 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5191 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5192 PetscFunctionReturn(0); 5193 } 5194 5195 /*@C 5196 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5197 5198 Not Collective 5199 5200 Input Parameters: 5201 + A - the matrix 5202 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5203 - row, col - index sets of rows and columns to extract (or NULL) 5204 5205 Output Parameter: 5206 . A_loc - the local sequential matrix generated 5207 5208 Level: developer 5209 5210 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5211 5212 @*/ 5213 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5214 { 5215 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5216 PetscErrorCode ierr; 5217 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5218 IS isrowa,iscola; 5219 Mat *aloc; 5220 PetscBool match; 5221 5222 PetscFunctionBegin; 5223 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5224 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5225 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5226 if (!row) { 5227 start = A->rmap->rstart; end = A->rmap->rend; 5228 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5229 } else { 5230 isrowa = *row; 5231 } 5232 if (!col) { 5233 start = A->cmap->rstart; 5234 cmap = a->garray; 5235 nzA = a->A->cmap->n; 5236 nzB = a->B->cmap->n; 5237 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5238 ncols = 0; 5239 for (i=0; i<nzB; i++) { 5240 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5241 else break; 5242 } 5243 imark = i; 5244 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5245 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5246 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5247 } else { 5248 iscola = *col; 5249 } 5250 if (scall != MAT_INITIAL_MATRIX) { 5251 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5252 aloc[0] = *A_loc; 5253 } 5254 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5255 if (!col) { /* attach global id of condensed columns */ 5256 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5257 } 5258 *A_loc = aloc[0]; 5259 ierr = PetscFree(aloc);CHKERRQ(ierr); 5260 if (!row) { 5261 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5262 } 5263 if (!col) { 5264 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5265 } 5266 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5267 PetscFunctionReturn(0); 5268 } 5269 5270 /* 5271 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5272 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5273 * on a global size. 5274 * */ 5275 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5276 { 5277 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5278 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5279 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5280 PetscMPIInt owner; 5281 PetscSFNode *iremote,*oiremote; 5282 const PetscInt *lrowindices; 5283 PetscErrorCode ierr; 5284 PetscSF sf,osf; 5285 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5286 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5287 MPI_Comm comm; 5288 ISLocalToGlobalMapping mapping; 5289 5290 PetscFunctionBegin; 5291 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5292 /* plocalsize is the number of roots 5293 * nrows is the number of leaves 5294 * */ 5295 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5296 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5297 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5298 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5299 for (i=0;i<nrows;i++) { 5300 /* Find a remote index and an owner for a row 5301 * The row could be local or remote 5302 * */ 5303 owner = 0; 5304 lidx = 0; 5305 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5306 iremote[i].index = lidx; 5307 iremote[i].rank = owner; 5308 } 5309 /* Create SF to communicate how many nonzero columns for each row */ 5310 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5311 /* SF will figure out the number of nonzero colunms for each row, and their 5312 * offsets 5313 * */ 5314 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5315 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5316 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5317 5318 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5319 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5320 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5321 roffsets[0] = 0; 5322 roffsets[1] = 0; 5323 for (i=0;i<plocalsize;i++) { 5324 /* diag */ 5325 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5326 /* off diag */ 5327 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5328 /* compute offsets so that we relative location for each row */ 5329 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5330 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5331 } 5332 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5333 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5334 /* 'r' means root, and 'l' means leaf */ 5335 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5336 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5337 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5338 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5339 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5340 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5341 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5342 dntotalcols = 0; 5343 ontotalcols = 0; 5344 ncol = 0; 5345 for (i=0;i<nrows;i++) { 5346 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5347 ncol = PetscMax(pnnz[i],ncol); 5348 /* diag */ 5349 dntotalcols += nlcols[i*2+0]; 5350 /* off diag */ 5351 ontotalcols += nlcols[i*2+1]; 5352 } 5353 /* We do not need to figure the right number of columns 5354 * since all the calculations will be done by going through the raw data 5355 * */ 5356 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5357 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5358 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5359 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5360 /* diag */ 5361 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5362 /* off diag */ 5363 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5364 /* diag */ 5365 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5366 /* off diag */ 5367 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5368 dntotalcols = 0; 5369 ontotalcols = 0; 5370 ntotalcols = 0; 5371 for (i=0;i<nrows;i++) { 5372 owner = 0; 5373 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5374 /* Set iremote for diag matrix */ 5375 for (j=0;j<nlcols[i*2+0];j++) { 5376 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5377 iremote[dntotalcols].rank = owner; 5378 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5379 ilocal[dntotalcols++] = ntotalcols++; 5380 } 5381 /* off diag */ 5382 for (j=0;j<nlcols[i*2+1];j++) { 5383 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5384 oiremote[ontotalcols].rank = owner; 5385 oilocal[ontotalcols++] = ntotalcols++; 5386 } 5387 } 5388 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5389 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5390 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5391 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5392 /* P serves as roots and P_oth is leaves 5393 * Diag matrix 5394 * */ 5395 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5396 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5397 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5398 5399 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5400 /* Off diag */ 5401 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5402 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5403 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5404 /* We operate on the matrix internal data for saving memory */ 5405 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5406 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5407 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5408 /* Convert to global indices for diag matrix */ 5409 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5410 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5411 /* We want P_oth store global indices */ 5412 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5413 /* Use memory scalable approach */ 5414 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5415 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5416 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5417 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5418 /* Convert back to local indices */ 5419 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5420 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5421 nout = 0; 5422 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5423 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5424 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5425 /* Exchange values */ 5426 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5427 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5428 /* Stop PETSc from shrinking memory */ 5429 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5430 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5431 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5432 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5433 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5434 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5435 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5436 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5437 PetscFunctionReturn(0); 5438 } 5439 5440 /* 5441 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5442 * This supports MPIAIJ and MAIJ 5443 * */ 5444 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5445 { 5446 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5447 Mat_SeqAIJ *p_oth; 5448 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5449 IS rows,map; 5450 PetscHMapI hamp; 5451 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5452 MPI_Comm comm; 5453 PetscSF sf,osf; 5454 PetscBool has; 5455 PetscErrorCode ierr; 5456 5457 PetscFunctionBegin; 5458 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5459 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5460 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5461 * and then create a submatrix (that often is an overlapping matrix) 5462 * */ 5463 if (reuse == MAT_INITIAL_MATRIX) { 5464 /* Use a hash table to figure out unique keys */ 5465 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5466 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5467 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5468 count = 0; 5469 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5470 for (i=0;i<a->B->cmap->n;i++) { 5471 key = a->garray[i]/dof; 5472 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5473 if (!has) { 5474 mapping[i] = count; 5475 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5476 } else { 5477 /* Current 'i' has the same value the previous step */ 5478 mapping[i] = count-1; 5479 } 5480 } 5481 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5482 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5483 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5484 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5485 off = 0; 5486 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5487 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5488 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5489 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5490 /* In case, the matrix was already created but users want to recreate the matrix */ 5491 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5492 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5493 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5494 ierr = ISDestroy(&map);CHKERRQ(ierr); 5495 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5496 } else if (reuse == MAT_REUSE_MATRIX) { 5497 /* If matrix was already created, we simply update values using SF objects 5498 * that as attached to the matrix ealier. 5499 * */ 5500 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5501 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5502 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5503 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5504 /* Update values in place */ 5505 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5506 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5507 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5508 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5509 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5510 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5511 PetscFunctionReturn(0); 5512 } 5513 5514 /*@C 5515 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5516 5517 Collective on Mat 5518 5519 Input Parameters: 5520 + A,B - the matrices in mpiaij format 5521 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5522 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5523 5524 Output Parameter: 5525 + rowb, colb - index sets of rows and columns of B to extract 5526 - B_seq - the sequential matrix generated 5527 5528 Level: developer 5529 5530 @*/ 5531 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5532 { 5533 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5534 PetscErrorCode ierr; 5535 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5536 IS isrowb,iscolb; 5537 Mat *bseq=NULL; 5538 5539 PetscFunctionBegin; 5540 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5541 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5542 } 5543 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5544 5545 if (scall == MAT_INITIAL_MATRIX) { 5546 start = A->cmap->rstart; 5547 cmap = a->garray; 5548 nzA = a->A->cmap->n; 5549 nzB = a->B->cmap->n; 5550 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5551 ncols = 0; 5552 for (i=0; i<nzB; i++) { /* row < local row index */ 5553 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5554 else break; 5555 } 5556 imark = i; 5557 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5558 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5559 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5560 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5561 } else { 5562 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5563 isrowb = *rowb; iscolb = *colb; 5564 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5565 bseq[0] = *B_seq; 5566 } 5567 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5568 *B_seq = bseq[0]; 5569 ierr = PetscFree(bseq);CHKERRQ(ierr); 5570 if (!rowb) { 5571 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5572 } else { 5573 *rowb = isrowb; 5574 } 5575 if (!colb) { 5576 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5577 } else { 5578 *colb = iscolb; 5579 } 5580 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5581 PetscFunctionReturn(0); 5582 } 5583 5584 /* 5585 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5586 of the OFF-DIAGONAL portion of local A 5587 5588 Collective on Mat 5589 5590 Input Parameters: 5591 + A,B - the matrices in mpiaij format 5592 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5593 5594 Output Parameter: 5595 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5596 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5597 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5598 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5599 5600 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5601 for this matrix. This is not desirable.. 5602 5603 Level: developer 5604 5605 */ 5606 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5607 { 5608 PetscErrorCode ierr; 5609 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5610 Mat_SeqAIJ *b_oth; 5611 VecScatter ctx; 5612 MPI_Comm comm; 5613 const PetscMPIInt *rprocs,*sprocs; 5614 const PetscInt *srow,*rstarts,*sstarts; 5615 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5616 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5617 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5618 MPI_Request *rwaits = NULL,*swaits = NULL; 5619 MPI_Status rstatus; 5620 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5621 5622 PetscFunctionBegin; 5623 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5624 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5625 5626 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5627 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5628 } 5629 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5630 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5631 5632 if (size == 1) { 5633 startsj_s = NULL; 5634 bufa_ptr = NULL; 5635 *B_oth = NULL; 5636 PetscFunctionReturn(0); 5637 } 5638 5639 ctx = a->Mvctx; 5640 tag = ((PetscObject)ctx)->tag; 5641 5642 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5643 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5644 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5645 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5646 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5647 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5648 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5649 5650 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5651 if (scall == MAT_INITIAL_MATRIX) { 5652 /* i-array */ 5653 /*---------*/ 5654 /* post receives */ 5655 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5656 for (i=0; i<nrecvs; i++) { 5657 rowlen = rvalues + rstarts[i]*rbs; 5658 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5659 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5660 } 5661 5662 /* pack the outgoing message */ 5663 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5664 5665 sstartsj[0] = 0; 5666 rstartsj[0] = 0; 5667 len = 0; /* total length of j or a array to be sent */ 5668 if (nsends) { 5669 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5670 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5671 } 5672 for (i=0; i<nsends; i++) { 5673 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5674 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5675 for (j=0; j<nrows; j++) { 5676 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5677 for (l=0; l<sbs; l++) { 5678 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5679 5680 rowlen[j*sbs+l] = ncols; 5681 5682 len += ncols; 5683 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5684 } 5685 k++; 5686 } 5687 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5688 5689 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5690 } 5691 /* recvs and sends of i-array are completed */ 5692 i = nrecvs; 5693 while (i--) { 5694 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5695 } 5696 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5697 ierr = PetscFree(svalues);CHKERRQ(ierr); 5698 5699 /* allocate buffers for sending j and a arrays */ 5700 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5701 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5702 5703 /* create i-array of B_oth */ 5704 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5705 5706 b_othi[0] = 0; 5707 len = 0; /* total length of j or a array to be received */ 5708 k = 0; 5709 for (i=0; i<nrecvs; i++) { 5710 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5711 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5712 for (j=0; j<nrows; j++) { 5713 b_othi[k+1] = b_othi[k] + rowlen[j]; 5714 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5715 k++; 5716 } 5717 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5718 } 5719 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5720 5721 /* allocate space for j and a arrrays of B_oth */ 5722 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5723 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5724 5725 /* j-array */ 5726 /*---------*/ 5727 /* post receives of j-array */ 5728 for (i=0; i<nrecvs; i++) { 5729 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5730 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5731 } 5732 5733 /* pack the outgoing message j-array */ 5734 if (nsends) k = sstarts[0]; 5735 for (i=0; i<nsends; i++) { 5736 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5737 bufJ = bufj+sstartsj[i]; 5738 for (j=0; j<nrows; j++) { 5739 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5740 for (ll=0; ll<sbs; ll++) { 5741 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5742 for (l=0; l<ncols; l++) { 5743 *bufJ++ = cols[l]; 5744 } 5745 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5746 } 5747 } 5748 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5749 } 5750 5751 /* recvs and sends of j-array are completed */ 5752 i = nrecvs; 5753 while (i--) { 5754 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5755 } 5756 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5757 } else if (scall == MAT_REUSE_MATRIX) { 5758 sstartsj = *startsj_s; 5759 rstartsj = *startsj_r; 5760 bufa = *bufa_ptr; 5761 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5762 b_otha = b_oth->a; 5763 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5764 5765 /* a-array */ 5766 /*---------*/ 5767 /* post receives of a-array */ 5768 for (i=0; i<nrecvs; i++) { 5769 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5770 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5771 } 5772 5773 /* pack the outgoing message a-array */ 5774 if (nsends) k = sstarts[0]; 5775 for (i=0; i<nsends; i++) { 5776 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5777 bufA = bufa+sstartsj[i]; 5778 for (j=0; j<nrows; j++) { 5779 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5780 for (ll=0; ll<sbs; ll++) { 5781 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5782 for (l=0; l<ncols; l++) { 5783 *bufA++ = vals[l]; 5784 } 5785 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5786 } 5787 } 5788 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5789 } 5790 /* recvs and sends of a-array are completed */ 5791 i = nrecvs; 5792 while (i--) { 5793 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5794 } 5795 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5796 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5797 5798 if (scall == MAT_INITIAL_MATRIX) { 5799 /* put together the new matrix */ 5800 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5801 5802 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5803 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5804 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5805 b_oth->free_a = PETSC_TRUE; 5806 b_oth->free_ij = PETSC_TRUE; 5807 b_oth->nonew = 0; 5808 5809 ierr = PetscFree(bufj);CHKERRQ(ierr); 5810 if (!startsj_s || !bufa_ptr) { 5811 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5812 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5813 } else { 5814 *startsj_s = sstartsj; 5815 *startsj_r = rstartsj; 5816 *bufa_ptr = bufa; 5817 } 5818 } 5819 5820 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5821 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5822 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5823 PetscFunctionReturn(0); 5824 } 5825 5826 /*@C 5827 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5828 5829 Not Collective 5830 5831 Input Parameters: 5832 . A - The matrix in mpiaij format 5833 5834 Output Parameter: 5835 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5836 . colmap - A map from global column index to local index into lvec 5837 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5838 5839 Level: developer 5840 5841 @*/ 5842 #if defined(PETSC_USE_CTABLE) 5843 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5844 #else 5845 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5846 #endif 5847 { 5848 Mat_MPIAIJ *a; 5849 5850 PetscFunctionBegin; 5851 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5852 PetscValidPointer(lvec, 2); 5853 PetscValidPointer(colmap, 3); 5854 PetscValidPointer(multScatter, 4); 5855 a = (Mat_MPIAIJ*) A->data; 5856 if (lvec) *lvec = a->lvec; 5857 if (colmap) *colmap = a->colmap; 5858 if (multScatter) *multScatter = a->Mvctx; 5859 PetscFunctionReturn(0); 5860 } 5861 5862 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5863 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5864 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5865 #if defined(PETSC_HAVE_MKL_SPARSE) 5866 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5867 #endif 5868 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5869 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5870 #if defined(PETSC_HAVE_ELEMENTAL) 5871 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5872 #endif 5873 #if defined(PETSC_HAVE_SCALAPACK) 5874 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5875 #endif 5876 #if defined(PETSC_HAVE_HYPRE) 5877 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5878 #endif 5879 #if defined(PETSC_HAVE_CUDA) 5880 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5881 #endif 5882 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5883 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5884 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5885 5886 /* 5887 Computes (B'*A')' since computing B*A directly is untenable 5888 5889 n p p 5890 [ ] [ ] [ ] 5891 m [ A ] * n [ B ] = m [ C ] 5892 [ ] [ ] [ ] 5893 5894 */ 5895 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5896 { 5897 PetscErrorCode ierr; 5898 Mat At,Bt,Ct; 5899 5900 PetscFunctionBegin; 5901 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5902 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5903 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5904 ierr = MatDestroy(&At);CHKERRQ(ierr); 5905 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5906 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5907 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5908 PetscFunctionReturn(0); 5909 } 5910 5911 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5912 { 5913 PetscErrorCode ierr; 5914 PetscBool cisdense; 5915 5916 PetscFunctionBegin; 5917 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5918 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5919 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5920 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5921 if (!cisdense) { 5922 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5923 } 5924 ierr = MatSetUp(C);CHKERRQ(ierr); 5925 5926 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5927 PetscFunctionReturn(0); 5928 } 5929 5930 /* ----------------------------------------------------------------*/ 5931 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5932 { 5933 Mat_Product *product = C->product; 5934 Mat A = product->A,B=product->B; 5935 5936 PetscFunctionBegin; 5937 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5938 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5939 5940 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5941 C->ops->productsymbolic = MatProductSymbolic_AB; 5942 PetscFunctionReturn(0); 5943 } 5944 5945 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5946 { 5947 PetscErrorCode ierr; 5948 Mat_Product *product = C->product; 5949 5950 PetscFunctionBegin; 5951 if (product->type == MATPRODUCT_AB) { 5952 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 5953 } 5954 PetscFunctionReturn(0); 5955 } 5956 /* ----------------------------------------------------------------*/ 5957 5958 /*MC 5959 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5960 5961 Options Database Keys: 5962 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5963 5964 Level: beginner 5965 5966 Notes: 5967 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5968 in this case the values associated with the rows and columns one passes in are set to zero 5969 in the matrix 5970 5971 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5972 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5973 5974 .seealso: MatCreateAIJ() 5975 M*/ 5976 5977 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5978 { 5979 Mat_MPIAIJ *b; 5980 PetscErrorCode ierr; 5981 PetscMPIInt size; 5982 5983 PetscFunctionBegin; 5984 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5985 5986 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5987 B->data = (void*)b; 5988 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5989 B->assembled = PETSC_FALSE; 5990 B->insertmode = NOT_SET_VALUES; 5991 b->size = size; 5992 5993 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5994 5995 /* build cache for off array entries formed */ 5996 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5997 5998 b->donotstash = PETSC_FALSE; 5999 b->colmap = NULL; 6000 b->garray = NULL; 6001 b->roworiented = PETSC_TRUE; 6002 6003 /* stuff used for matrix vector multiply */ 6004 b->lvec = NULL; 6005 b->Mvctx = NULL; 6006 6007 /* stuff for MatGetRow() */ 6008 b->rowindices = NULL; 6009 b->rowvalues = NULL; 6010 b->getrowactive = PETSC_FALSE; 6011 6012 /* flexible pointer used in CUSP/CUSPARSE classes */ 6013 b->spptr = NULL; 6014 6015 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6016 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6017 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6018 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6019 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6020 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6021 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6022 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6023 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6024 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6025 #if defined(PETSC_HAVE_MKL_SPARSE) 6026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6027 #endif 6028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6031 #if defined(PETSC_HAVE_ELEMENTAL) 6032 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6033 #endif 6034 #if defined(PETSC_HAVE_SCALAPACK) 6035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6036 #endif 6037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6039 #if defined(PETSC_HAVE_HYPRE) 6040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6042 #endif 6043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6045 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6046 PetscFunctionReturn(0); 6047 } 6048 6049 /*@C 6050 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6051 and "off-diagonal" part of the matrix in CSR format. 6052 6053 Collective 6054 6055 Input Parameters: 6056 + comm - MPI communicator 6057 . m - number of local rows (Cannot be PETSC_DECIDE) 6058 . n - This value should be the same as the local size used in creating the 6059 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6060 calculated if N is given) For square matrices n is almost always m. 6061 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6062 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6063 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6064 . j - column indices 6065 . a - matrix values 6066 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6067 . oj - column indices 6068 - oa - matrix values 6069 6070 Output Parameter: 6071 . mat - the matrix 6072 6073 Level: advanced 6074 6075 Notes: 6076 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6077 must free the arrays once the matrix has been destroyed and not before. 6078 6079 The i and j indices are 0 based 6080 6081 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6082 6083 This sets local rows and cannot be used to set off-processor values. 6084 6085 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6086 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6087 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6088 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6089 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6090 communication if it is known that only local entries will be set. 6091 6092 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6093 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6094 @*/ 6095 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6096 { 6097 PetscErrorCode ierr; 6098 Mat_MPIAIJ *maij; 6099 6100 PetscFunctionBegin; 6101 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6102 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6103 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6104 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6105 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6106 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6107 maij = (Mat_MPIAIJ*) (*mat)->data; 6108 6109 (*mat)->preallocated = PETSC_TRUE; 6110 6111 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6112 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6113 6114 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6115 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6116 6117 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6118 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6119 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6120 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6121 6122 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6123 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6124 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6125 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6126 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6127 PetscFunctionReturn(0); 6128 } 6129 6130 /* 6131 Special version for direct calls from Fortran 6132 */ 6133 #include <petsc/private/fortranimpl.h> 6134 6135 /* Change these macros so can be used in void function */ 6136 #undef CHKERRQ 6137 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6138 #undef SETERRQ2 6139 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6140 #undef SETERRQ3 6141 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6142 #undef SETERRQ 6143 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6144 6145 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6146 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6147 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6148 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6149 #else 6150 #endif 6151 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6152 { 6153 Mat mat = *mmat; 6154 PetscInt m = *mm, n = *mn; 6155 InsertMode addv = *maddv; 6156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6157 PetscScalar value; 6158 PetscErrorCode ierr; 6159 6160 MatCheckPreallocated(mat,1); 6161 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6162 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6163 { 6164 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6165 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6166 PetscBool roworiented = aij->roworiented; 6167 6168 /* Some Variables required in the macro */ 6169 Mat A = aij->A; 6170 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6171 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6172 MatScalar *aa = a->a; 6173 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6174 Mat B = aij->B; 6175 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6176 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6177 MatScalar *ba = b->a; 6178 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6179 * cannot use "#if defined" inside a macro. */ 6180 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6181 6182 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6183 PetscInt nonew = a->nonew; 6184 MatScalar *ap1,*ap2; 6185 6186 PetscFunctionBegin; 6187 for (i=0; i<m; i++) { 6188 if (im[i] < 0) continue; 6189 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6190 if (im[i] >= rstart && im[i] < rend) { 6191 row = im[i] - rstart; 6192 lastcol1 = -1; 6193 rp1 = aj + ai[row]; 6194 ap1 = aa + ai[row]; 6195 rmax1 = aimax[row]; 6196 nrow1 = ailen[row]; 6197 low1 = 0; 6198 high1 = nrow1; 6199 lastcol2 = -1; 6200 rp2 = bj + bi[row]; 6201 ap2 = ba + bi[row]; 6202 rmax2 = bimax[row]; 6203 nrow2 = bilen[row]; 6204 low2 = 0; 6205 high2 = nrow2; 6206 6207 for (j=0; j<n; j++) { 6208 if (roworiented) value = v[i*n+j]; 6209 else value = v[i+j*m]; 6210 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6211 if (in[j] >= cstart && in[j] < cend) { 6212 col = in[j] - cstart; 6213 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6214 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6215 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6216 #endif 6217 } else if (in[j] < 0) continue; 6218 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6219 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6220 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6221 } else { 6222 if (mat->was_assembled) { 6223 if (!aij->colmap) { 6224 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6225 } 6226 #if defined(PETSC_USE_CTABLE) 6227 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6228 col--; 6229 #else 6230 col = aij->colmap[in[j]] - 1; 6231 #endif 6232 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6233 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6234 col = in[j]; 6235 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6236 B = aij->B; 6237 b = (Mat_SeqAIJ*)B->data; 6238 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6239 rp2 = bj + bi[row]; 6240 ap2 = ba + bi[row]; 6241 rmax2 = bimax[row]; 6242 nrow2 = bilen[row]; 6243 low2 = 0; 6244 high2 = nrow2; 6245 bm = aij->B->rmap->n; 6246 ba = b->a; 6247 inserted = PETSC_FALSE; 6248 } 6249 } else col = in[j]; 6250 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6252 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6253 #endif 6254 } 6255 } 6256 } else if (!aij->donotstash) { 6257 if (roworiented) { 6258 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6259 } else { 6260 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6261 } 6262 } 6263 } 6264 } 6265 PetscFunctionReturnVoid(); 6266 } 6267