1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->pinnedtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 goto a_noinsert; \ 468 } \ 469 } \ 470 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 471 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 472 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 473 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 474 N = nrow1++ - 1; a->nz++; high1++; \ 475 /* shift up all the later entries in this row */ \ 476 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 477 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 478 rp1[_i] = col; \ 479 ap1[_i] = value; \ 480 A->nonzerostate++;\ 481 a_noinsert: ; \ 482 ailen[row] = nrow1; \ 483 } 484 485 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 486 { \ 487 if (col <= lastcol2) low2 = 0; \ 488 else high2 = nrow2; \ 489 lastcol2 = col; \ 490 while (high2-low2 > 5) { \ 491 t = (low2+high2)/2; \ 492 if (rp2[t] > col) high2 = t; \ 493 else low2 = t; \ 494 } \ 495 for (_i=low2; _i<high2; _i++) { \ 496 if (rp2[_i] > col) break; \ 497 if (rp2[_i] == col) { \ 498 if (addv == ADD_VALUES) { \ 499 ap2[_i] += value; \ 500 (void)PetscLogFlops(1.0); \ 501 } \ 502 else ap2[_i] = value; \ 503 goto b_noinsert; \ 504 } \ 505 } \ 506 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 507 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 508 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 509 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 510 N = nrow2++ - 1; b->nz++; high2++; \ 511 /* shift up all the later entries in this row */ \ 512 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 513 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 514 rp2[_i] = col; \ 515 ap2[_i] = value; \ 516 B->nonzerostate++; \ 517 b_noinsert: ; \ 518 bilen[row] = nrow2; \ 519 } 520 521 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 522 { 523 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 524 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 525 PetscErrorCode ierr; 526 PetscInt l,*garray = mat->garray,diag; 527 528 PetscFunctionBegin; 529 /* code only works for square matrices A */ 530 531 /* find size of row to the left of the diagonal part */ 532 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 533 row = row - diag; 534 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 535 if (garray[b->j[b->i[row]+l]] > diag) break; 536 } 537 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 538 539 /* diagonal part */ 540 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 541 542 /* right of diagonal part */ 543 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 548 { 549 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 550 PetscScalar value = 0.0; 551 PetscErrorCode ierr; 552 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 553 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 554 PetscBool roworiented = aij->roworiented; 555 556 /* Some Variables required in the macro */ 557 Mat A = aij->A; 558 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 559 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 560 MatScalar *aa = a->a; 561 PetscBool ignorezeroentries = a->ignorezeroentries; 562 Mat B = aij->B; 563 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 564 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 565 MatScalar *ba = b->a; 566 567 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 568 PetscInt nonew; 569 MatScalar *ap1,*ap2; 570 571 PetscFunctionBegin; 572 for (i=0; i<m; i++) { 573 if (im[i] < 0) continue; 574 #if defined(PETSC_USE_DEBUG) 575 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 576 #endif 577 if (im[i] >= rstart && im[i] < rend) { 578 row = im[i] - rstart; 579 lastcol1 = -1; 580 rp1 = aj + ai[row]; 581 ap1 = aa + ai[row]; 582 rmax1 = aimax[row]; 583 nrow1 = ailen[row]; 584 low1 = 0; 585 high1 = nrow1; 586 lastcol2 = -1; 587 rp2 = bj + bi[row]; 588 ap2 = ba + bi[row]; 589 rmax2 = bimax[row]; 590 nrow2 = bilen[row]; 591 low2 = 0; 592 high2 = nrow2; 593 594 for (j=0; j<n; j++) { 595 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 596 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 597 if (in[j] >= cstart && in[j] < cend) { 598 col = in[j] - cstart; 599 nonew = a->nonew; 600 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 601 } else if (in[j] < 0) continue; 602 #if defined(PETSC_USE_DEBUG) 603 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 604 #endif 605 else { 606 if (mat->was_assembled) { 607 if (!aij->colmap) { 608 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 609 } 610 #if defined(PETSC_USE_CTABLE) 611 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 612 col--; 613 #else 614 col = aij->colmap[in[j]] - 1; 615 #endif 616 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 617 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 618 col = in[j]; 619 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 620 B = aij->B; 621 b = (Mat_SeqAIJ*)B->data; 622 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 623 rp2 = bj + bi[row]; 624 ap2 = ba + bi[row]; 625 rmax2 = bimax[row]; 626 nrow2 = bilen[row]; 627 low2 = 0; 628 high2 = nrow2; 629 bm = aij->B->rmap->n; 630 ba = b->a; 631 } else if (col < 0) { 632 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 633 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 634 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 635 } 636 } else col = in[j]; 637 nonew = b->nonew; 638 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 639 } 640 } 641 } else { 642 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 643 if (!aij->donotstash) { 644 mat->assembled = PETSC_FALSE; 645 if (roworiented) { 646 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 647 } else { 648 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 649 } 650 } 651 } 652 } 653 PetscFunctionReturn(0); 654 } 655 656 /* 657 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 658 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 659 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 660 */ 661 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 662 { 663 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 664 Mat A = aij->A; /* diagonal part of the matrix */ 665 Mat B = aij->B; /* offdiagonal part of the matrix */ 666 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 667 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 668 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 669 PetscInt *ailen = a->ilen,*aj = a->j; 670 PetscInt *bilen = b->ilen,*bj = b->j; 671 PetscInt am = aij->A->rmap->n,j; 672 PetscInt diag_so_far = 0,dnz; 673 PetscInt offd_so_far = 0,onz; 674 675 PetscFunctionBegin; 676 /* Iterate over all rows of the matrix */ 677 for (j=0; j<am; j++) { 678 dnz = onz = 0; 679 /* Iterate over all non-zero columns of the current row */ 680 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 681 /* If column is in the diagonal */ 682 if (mat_j[col] >= cstart && mat_j[col] < cend) { 683 aj[diag_so_far++] = mat_j[col] - cstart; 684 dnz++; 685 } else { /* off-diagonal entries */ 686 bj[offd_so_far++] = mat_j[col]; 687 onz++; 688 } 689 } 690 ailen[j] = dnz; 691 bilen[j] = onz; 692 } 693 PetscFunctionReturn(0); 694 } 695 696 /* 697 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 698 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 699 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 700 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 701 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 702 */ 703 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 704 { 705 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 706 Mat A = aij->A; /* diagonal part of the matrix */ 707 Mat B = aij->B; /* offdiagonal part of the matrix */ 708 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 710 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 711 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 712 PetscInt *ailen = a->ilen,*aj = a->j; 713 PetscInt *bilen = b->ilen,*bj = b->j; 714 PetscInt am = aij->A->rmap->n,j; 715 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 716 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 717 PetscScalar *aa = a->a,*ba = b->a; 718 719 PetscFunctionBegin; 720 /* Iterate over all rows of the matrix */ 721 for (j=0; j<am; j++) { 722 dnz_row = onz_row = 0; 723 rowstart_offd = full_offd_i[j]; 724 rowstart_diag = full_diag_i[j]; 725 /* Iterate over all non-zero columns of the current row */ 726 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 727 /* If column is in the diagonal */ 728 if (mat_j[col] >= cstart && mat_j[col] < cend) { 729 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 730 aa[rowstart_diag+dnz_row] = mat_a[col]; 731 dnz_row++; 732 } else { /* off-diagonal entries */ 733 bj[rowstart_offd+onz_row] = mat_j[col]; 734 ba[rowstart_offd+onz_row] = mat_a[col]; 735 onz_row++; 736 } 737 } 738 ailen[j] = dnz_row; 739 bilen[j] = onz_row; 740 } 741 PetscFunctionReturn(0); 742 } 743 744 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 745 { 746 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 747 PetscErrorCode ierr; 748 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 749 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 750 751 PetscFunctionBegin; 752 for (i=0; i<m; i++) { 753 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 754 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 755 if (idxm[i] >= rstart && idxm[i] < rend) { 756 row = idxm[i] - rstart; 757 for (j=0; j<n; j++) { 758 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 759 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 760 if (idxn[j] >= cstart && idxn[j] < cend) { 761 col = idxn[j] - cstart; 762 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 763 } else { 764 if (!aij->colmap) { 765 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 766 } 767 #if defined(PETSC_USE_CTABLE) 768 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 769 col--; 770 #else 771 col = aij->colmap[idxn[j]] - 1; 772 #endif 773 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 774 else { 775 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 776 } 777 } 778 } 779 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 785 786 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 787 { 788 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 789 PetscErrorCode ierr; 790 PetscInt nstash,reallocs; 791 792 PetscFunctionBegin; 793 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 794 795 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 796 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 797 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 798 PetscFunctionReturn(0); 799 } 800 801 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 804 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 805 PetscErrorCode ierr; 806 PetscMPIInt n; 807 PetscInt i,j,rstart,ncols,flg; 808 PetscInt *row,*col; 809 PetscBool other_disassembled; 810 PetscScalar *val; 811 812 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 813 814 PetscFunctionBegin; 815 if (!aij->donotstash && !mat->nooffprocentries) { 816 while (1) { 817 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 818 if (!flg) break; 819 820 for (i=0; i<n; ) { 821 /* Now identify the consecutive vals belonging to the same row */ 822 for (j=i,rstart=row[j]; j<n; j++) { 823 if (row[j] != rstart) break; 824 } 825 if (j < n) ncols = j-i; 826 else ncols = n-i; 827 /* Now assemble all these values with a single function call */ 828 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 829 830 i = j; 831 } 832 } 833 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 834 } 835 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 836 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 837 #endif 838 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 839 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 840 841 /* determine if any processor has disassembled, if so we must 842 also disassemble ourself, in order that we may reassemble. */ 843 /* 844 if nonzero structure of submatrix B cannot change then we know that 845 no processor disassembled thus we can skip this stuff 846 */ 847 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 848 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 849 if (mat->was_assembled && !other_disassembled) { 850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 851 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 852 #endif 853 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 854 } 855 } 856 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 857 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 858 } 859 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 860 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 861 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 862 #endif 863 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 864 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 865 866 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 867 868 aij->rowvalues = 0; 869 870 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 871 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 872 873 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 874 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 875 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 876 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 877 } 878 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 879 mat->offloadmask = PETSC_OFFLOAD_BOTH; 880 #endif 881 PetscFunctionReturn(0); 882 } 883 884 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 885 { 886 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 891 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 896 { 897 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 898 PetscObjectState sA, sB; 899 PetscInt *lrows; 900 PetscInt r, len; 901 PetscBool cong, lch, gch; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 /* get locally owned rows */ 906 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 907 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 908 /* fix right hand side if needed */ 909 if (x && b) { 910 const PetscScalar *xx; 911 PetscScalar *bb; 912 913 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 914 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 915 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 916 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 917 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 918 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 919 } 920 921 sA = mat->A->nonzerostate; 922 sB = mat->B->nonzerostate; 923 924 if (diag != 0.0 && cong) { 925 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 926 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 927 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 928 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 929 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 930 PetscInt nnwA, nnwB; 931 PetscBool nnzA, nnzB; 932 933 nnwA = aijA->nonew; 934 nnwB = aijB->nonew; 935 nnzA = aijA->keepnonzeropattern; 936 nnzB = aijB->keepnonzeropattern; 937 if (!nnzA) { 938 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 939 aijA->nonew = 0; 940 } 941 if (!nnzB) { 942 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 943 aijB->nonew = 0; 944 } 945 /* Must zero here before the next loop */ 946 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 947 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 948 for (r = 0; r < len; ++r) { 949 const PetscInt row = lrows[r] + A->rmap->rstart; 950 if (row >= A->cmap->N) continue; 951 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 952 } 953 aijA->nonew = nnwA; 954 aijB->nonew = nnwB; 955 } else { 956 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 957 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 958 } 959 ierr = PetscFree(lrows);CHKERRQ(ierr); 960 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 961 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 962 963 /* reduce nonzerostate */ 964 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 965 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 966 if (gch) A->nonzerostate++; 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 971 { 972 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 973 PetscErrorCode ierr; 974 PetscMPIInt n = A->rmap->n; 975 PetscInt i,j,r,m,p = 0,len = 0; 976 PetscInt *lrows,*owners = A->rmap->range; 977 PetscSFNode *rrows; 978 PetscSF sf; 979 const PetscScalar *xx; 980 PetscScalar *bb,*mask; 981 Vec xmask,lmask; 982 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 983 const PetscInt *aj, *ii,*ridx; 984 PetscScalar *aa; 985 986 PetscFunctionBegin; 987 /* Create SF where leaves are input rows and roots are owned rows */ 988 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 989 for (r = 0; r < n; ++r) lrows[r] = -1; 990 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 991 for (r = 0; r < N; ++r) { 992 const PetscInt idx = rows[r]; 993 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 994 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 995 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 996 } 997 rrows[r].rank = p; 998 rrows[r].index = rows[r] - owners[p]; 999 } 1000 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1001 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1002 /* Collect flags for rows to be zeroed */ 1003 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1004 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1005 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1006 /* Compress and put in row numbers */ 1007 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1008 /* zero diagonal part of matrix */ 1009 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1010 /* handle off diagonal part of matrix */ 1011 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1012 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1013 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1014 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1015 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1016 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1017 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1018 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1019 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1020 PetscBool cong; 1021 1022 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1023 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1024 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1027 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1028 } 1029 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1030 /* remove zeroed rows of off diagonal matrix */ 1031 ii = aij->i; 1032 for (i=0; i<len; i++) { 1033 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1034 } 1035 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1036 if (aij->compressedrow.use) { 1037 m = aij->compressedrow.nrows; 1038 ii = aij->compressedrow.i; 1039 ridx = aij->compressedrow.rindex; 1040 for (i=0; i<m; i++) { 1041 n = ii[i+1] - ii[i]; 1042 aj = aij->j + ii[i]; 1043 aa = aij->a + ii[i]; 1044 1045 for (j=0; j<n; j++) { 1046 if (PetscAbsScalar(mask[*aj])) { 1047 if (b) bb[*ridx] -= *aa*xx[*aj]; 1048 *aa = 0.0; 1049 } 1050 aa++; 1051 aj++; 1052 } 1053 ridx++; 1054 } 1055 } else { /* do not use compressed row format */ 1056 m = l->B->rmap->n; 1057 for (i=0; i<m; i++) { 1058 n = ii[i+1] - ii[i]; 1059 aj = aij->j + ii[i]; 1060 aa = aij->a + ii[i]; 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[i] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 } 1070 } 1071 if (x && b) { 1072 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1073 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1074 } 1075 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1076 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1077 ierr = PetscFree(lrows);CHKERRQ(ierr); 1078 1079 /* only change matrix nonzero state if pattern was allowed to be changed */ 1080 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1081 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1082 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1088 { 1089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 PetscInt nt; 1092 VecScatter Mvctx = a->Mvctx; 1093 1094 PetscFunctionBegin; 1095 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1096 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1097 1098 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1099 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1100 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1101 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1116 { 1117 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1118 PetscErrorCode ierr; 1119 VecScatter Mvctx = a->Mvctx; 1120 1121 PetscFunctionBegin; 1122 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1123 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1124 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1125 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1126 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 1135 PetscFunctionBegin; 1136 /* do nondiagonal part */ 1137 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* add partial results together */ 1141 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1147 { 1148 MPI_Comm comm; 1149 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1150 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1151 IS Me,Notme; 1152 PetscErrorCode ierr; 1153 PetscInt M,N,first,last,*notme,i; 1154 PetscBool lf; 1155 PetscMPIInt size; 1156 1157 PetscFunctionBegin; 1158 /* Easy test: symmetric diagonal block */ 1159 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1160 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1161 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1162 if (!*f) PetscFunctionReturn(0); 1163 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1164 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1165 if (size == 1) PetscFunctionReturn(0); 1166 1167 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1168 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1169 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1170 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1171 for (i=0; i<first; i++) notme[i] = i; 1172 for (i=last; i<M; i++) notme[i-last+first] = i; 1173 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1174 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1175 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1176 Aoff = Aoffs[0]; 1177 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1178 Boff = Boffs[0]; 1179 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1180 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1181 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1182 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1183 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1184 ierr = PetscFree(notme);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1189 { 1190 PetscErrorCode ierr; 1191 1192 PetscFunctionBegin; 1193 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1198 { 1199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 /* do nondiagonal part */ 1204 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1205 /* do local part */ 1206 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1207 /* add partial results together */ 1208 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1218 { 1219 PetscErrorCode ierr; 1220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1221 1222 PetscFunctionBegin; 1223 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1224 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1225 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1230 { 1231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1236 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1241 { 1242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1243 PetscErrorCode ierr; 1244 1245 PetscFunctionBegin; 1246 #if defined(PETSC_USE_LOG) 1247 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1248 #endif 1249 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1250 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1251 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1252 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1253 #if defined(PETSC_USE_CTABLE) 1254 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1255 #else 1256 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1257 #endif 1258 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1259 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1260 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1261 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1262 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1263 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1264 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1265 1266 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1267 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1275 #if defined(PETSC_HAVE_ELEMENTAL) 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1277 #endif 1278 #if defined(PETSC_HAVE_HYPRE) 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1281 #endif 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1288 { 1289 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1290 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1291 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1292 PetscErrorCode ierr; 1293 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1294 int fd; 1295 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1296 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1297 PetscScalar *column_values; 1298 PetscInt message_count,flowcontrolcount; 1299 FILE *file; 1300 1301 PetscFunctionBegin; 1302 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1303 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1304 nz = A->nz + B->nz; 1305 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1306 if (!rank) { 1307 header[0] = MAT_FILE_CLASSID; 1308 header[1] = mat->rmap->N; 1309 header[2] = mat->cmap->N; 1310 1311 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1312 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1313 /* get largest number of rows any processor has */ 1314 rlen = mat->rmap->n; 1315 range = mat->rmap->range; 1316 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1317 } else { 1318 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1319 rlen = mat->rmap->n; 1320 } 1321 1322 /* load up the local row counts */ 1323 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1324 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1325 1326 /* store the row lengths to the file */ 1327 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1328 if (!rank) { 1329 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1330 for (i=1; i<size; i++) { 1331 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1332 rlen = range[i+1] - range[i]; 1333 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 } 1336 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1337 } else { 1338 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1339 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1340 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1341 } 1342 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1343 1344 /* load up the local column indices */ 1345 nzmax = nz; /* th processor needs space a largest processor needs */ 1346 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1347 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1348 cnt = 0; 1349 for (i=0; i<mat->rmap->n; i++) { 1350 for (j=B->i[i]; j<B->i[i+1]; j++) { 1351 if ((col = garray[B->j[j]]) > cstart) break; 1352 column_indices[cnt++] = col; 1353 } 1354 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1355 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1356 } 1357 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1358 1359 /* store the column indices to the file */ 1360 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1361 if (!rank) { 1362 MPI_Status status; 1363 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1364 for (i=1; i<size; i++) { 1365 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1366 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1367 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1368 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1369 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1370 } 1371 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1372 } else { 1373 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1374 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1375 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1376 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1377 } 1378 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1379 1380 /* load up the local column values */ 1381 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1382 cnt = 0; 1383 for (i=0; i<mat->rmap->n; i++) { 1384 for (j=B->i[i]; j<B->i[i+1]; j++) { 1385 if (garray[B->j[j]] > cstart) break; 1386 column_values[cnt++] = B->a[j]; 1387 } 1388 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1389 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1390 } 1391 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1392 1393 /* store the column values to the file */ 1394 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1395 if (!rank) { 1396 MPI_Status status; 1397 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1398 for (i=1; i<size; i++) { 1399 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1400 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1401 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1402 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1403 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1404 } 1405 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1406 } else { 1407 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1408 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1409 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1410 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1411 } 1412 ierr = PetscFree(column_values);CHKERRQ(ierr); 1413 1414 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1415 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #include <petscdraw.h> 1420 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1421 { 1422 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1423 PetscErrorCode ierr; 1424 PetscMPIInt rank = aij->rank,size = aij->size; 1425 PetscBool isdraw,iascii,isbinary; 1426 PetscViewer sviewer; 1427 PetscViewerFormat format; 1428 1429 PetscFunctionBegin; 1430 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1431 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1432 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1433 if (iascii) { 1434 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1435 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1436 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1437 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1438 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1439 for (i=0; i<(PetscInt)size; i++) { 1440 nmax = PetscMax(nmax,nz[i]); 1441 nmin = PetscMin(nmin,nz[i]); 1442 navg += nz[i]; 1443 } 1444 ierr = PetscFree(nz);CHKERRQ(ierr); 1445 navg = navg/size; 1446 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1447 PetscFunctionReturn(0); 1448 } 1449 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1450 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1451 MatInfo info; 1452 PetscBool inodes; 1453 1454 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1455 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1456 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1458 if (!inodes) { 1459 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1460 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1461 } else { 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1463 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1464 } 1465 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1467 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1469 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1470 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1471 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1472 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1475 PetscInt inodecount,inodelimit,*inodes; 1476 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1477 if (inodes) { 1478 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1479 } else { 1480 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1481 } 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1484 PetscFunctionReturn(0); 1485 } 1486 } else if (isbinary) { 1487 if (size == 1) { 1488 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1489 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1490 } else { 1491 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1492 } 1493 PetscFunctionReturn(0); 1494 } else if (iascii && size == 1) { 1495 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1496 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } else if (isdraw) { 1499 PetscDraw draw; 1500 PetscBool isnull; 1501 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1502 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1503 if (isnull) PetscFunctionReturn(0); 1504 } 1505 1506 { /* assemble the entire matrix onto first processor */ 1507 Mat A = NULL, Av; 1508 IS isrow,iscol; 1509 1510 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1511 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1512 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1513 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1514 /* The commented code uses MatCreateSubMatrices instead */ 1515 /* 1516 Mat *AA, A = NULL, Av; 1517 IS isrow,iscol; 1518 1519 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1520 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1521 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1522 if (!rank) { 1523 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1524 A = AA[0]; 1525 Av = AA[0]; 1526 } 1527 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1528 */ 1529 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1530 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1531 /* 1532 Everyone has to call to draw the matrix since the graphics waits are 1533 synchronized across all processors that share the PetscDraw object 1534 */ 1535 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1536 if (!rank) { 1537 if (((PetscObject)mat)->name) { 1538 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1539 } 1540 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1541 } 1542 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1543 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1544 ierr = MatDestroy(&A);CHKERRQ(ierr); 1545 } 1546 PetscFunctionReturn(0); 1547 } 1548 1549 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1550 { 1551 PetscErrorCode ierr; 1552 PetscBool iascii,isdraw,issocket,isbinary; 1553 1554 PetscFunctionBegin; 1555 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1556 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1557 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1558 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1559 if (iascii || isdraw || isbinary || issocket) { 1560 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1566 { 1567 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1568 PetscErrorCode ierr; 1569 Vec bb1 = 0; 1570 PetscBool hasop; 1571 1572 PetscFunctionBegin; 1573 if (flag == SOR_APPLY_UPPER) { 1574 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1579 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1580 } 1581 1582 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1583 if (flag & SOR_ZERO_INITIAL_GUESS) { 1584 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1585 its--; 1586 } 1587 1588 while (its--) { 1589 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1590 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1591 1592 /* update rhs: bb1 = bb - B*x */ 1593 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1594 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1595 1596 /* local sweep */ 1597 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1598 } 1599 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1600 if (flag & SOR_ZERO_INITIAL_GUESS) { 1601 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1602 its--; 1603 } 1604 while (its--) { 1605 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1607 1608 /* update rhs: bb1 = bb - B*x */ 1609 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1610 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1611 1612 /* local sweep */ 1613 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1614 } 1615 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1616 if (flag & SOR_ZERO_INITIAL_GUESS) { 1617 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1618 its--; 1619 } 1620 while (its--) { 1621 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1622 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1623 1624 /* update rhs: bb1 = bb - B*x */ 1625 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1626 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1627 1628 /* local sweep */ 1629 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1630 } 1631 } else if (flag & SOR_EISENSTAT) { 1632 Vec xx1; 1633 1634 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1635 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1636 1637 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1638 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1639 if (!mat->diag) { 1640 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1641 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1642 } 1643 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1644 if (hasop) { 1645 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1646 } else { 1647 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1648 } 1649 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1650 1651 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1652 1653 /* local sweep */ 1654 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1655 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1656 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1657 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1658 1659 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1660 1661 matin->factorerrortype = mat->A->factorerrortype; 1662 PetscFunctionReturn(0); 1663 } 1664 1665 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1666 { 1667 Mat aA,aB,Aperm; 1668 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1669 PetscScalar *aa,*ba; 1670 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1671 PetscSF rowsf,sf; 1672 IS parcolp = NULL; 1673 PetscBool done; 1674 PetscErrorCode ierr; 1675 1676 PetscFunctionBegin; 1677 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1678 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1679 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1680 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1681 1682 /* Invert row permutation to find out where my rows should go */ 1683 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1684 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1685 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1686 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1687 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1688 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1689 1690 /* Invert column permutation to find out where my columns should go */ 1691 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1692 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1693 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1694 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1695 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1696 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1697 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1698 1699 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1700 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1701 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1702 1703 /* Find out where my gcols should go */ 1704 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1705 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1706 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1707 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1708 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1709 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1710 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1711 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1712 1713 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1714 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1715 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1716 for (i=0; i<m; i++) { 1717 PetscInt row = rdest[i],rowner; 1718 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1719 for (j=ai[i]; j<ai[i+1]; j++) { 1720 PetscInt cowner,col = cdest[aj[j]]; 1721 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1722 if (rowner == cowner) dnnz[i]++; 1723 else onnz[i]++; 1724 } 1725 for (j=bi[i]; j<bi[i+1]; j++) { 1726 PetscInt cowner,col = gcdest[bj[j]]; 1727 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1728 if (rowner == cowner) dnnz[i]++; 1729 else onnz[i]++; 1730 } 1731 } 1732 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1733 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1734 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1735 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1737 1738 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1739 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1740 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1741 for (i=0; i<m; i++) { 1742 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1743 PetscInt j0,rowlen; 1744 rowlen = ai[i+1] - ai[i]; 1745 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1746 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1747 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1748 } 1749 rowlen = bi[i+1] - bi[i]; 1750 for (j0=j=0; j<rowlen; j0=j) { 1751 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1752 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1753 } 1754 } 1755 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1756 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1757 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1758 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1759 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1760 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1761 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1762 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1763 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1764 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1765 *B = Aperm; 1766 PetscFunctionReturn(0); 1767 } 1768 1769 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1770 { 1771 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1772 PetscErrorCode ierr; 1773 1774 PetscFunctionBegin; 1775 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1776 if (ghosts) *ghosts = aij->garray; 1777 PetscFunctionReturn(0); 1778 } 1779 1780 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1781 { 1782 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1783 Mat A = mat->A,B = mat->B; 1784 PetscErrorCode ierr; 1785 PetscLogDouble isend[5],irecv[5]; 1786 1787 PetscFunctionBegin; 1788 info->block_size = 1.0; 1789 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1790 1791 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1792 isend[3] = info->memory; isend[4] = info->mallocs; 1793 1794 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1795 1796 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1797 isend[3] += info->memory; isend[4] += info->mallocs; 1798 if (flag == MAT_LOCAL) { 1799 info->nz_used = isend[0]; 1800 info->nz_allocated = isend[1]; 1801 info->nz_unneeded = isend[2]; 1802 info->memory = isend[3]; 1803 info->mallocs = isend[4]; 1804 } else if (flag == MAT_GLOBAL_MAX) { 1805 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1806 1807 info->nz_used = irecv[0]; 1808 info->nz_allocated = irecv[1]; 1809 info->nz_unneeded = irecv[2]; 1810 info->memory = irecv[3]; 1811 info->mallocs = irecv[4]; 1812 } else if (flag == MAT_GLOBAL_SUM) { 1813 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1814 1815 info->nz_used = irecv[0]; 1816 info->nz_allocated = irecv[1]; 1817 info->nz_unneeded = irecv[2]; 1818 info->memory = irecv[3]; 1819 info->mallocs = irecv[4]; 1820 } 1821 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1822 info->fill_ratio_needed = 0; 1823 info->factor_mallocs = 0; 1824 PetscFunctionReturn(0); 1825 } 1826 1827 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1828 { 1829 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1830 PetscErrorCode ierr; 1831 1832 PetscFunctionBegin; 1833 switch (op) { 1834 case MAT_NEW_NONZERO_LOCATIONS: 1835 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1836 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1837 case MAT_KEEP_NONZERO_PATTERN: 1838 case MAT_NEW_NONZERO_LOCATION_ERR: 1839 case MAT_USE_INODES: 1840 case MAT_IGNORE_ZERO_ENTRIES: 1841 MatCheckPreallocated(A,1); 1842 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1843 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1844 break; 1845 case MAT_ROW_ORIENTED: 1846 MatCheckPreallocated(A,1); 1847 a->roworiented = flg; 1848 1849 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1850 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1851 break; 1852 case MAT_NEW_DIAGONALS: 1853 case MAT_SORTED_FULL: 1854 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1855 break; 1856 case MAT_IGNORE_OFF_PROC_ENTRIES: 1857 a->donotstash = flg; 1858 break; 1859 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1860 case MAT_SPD: 1861 case MAT_SYMMETRIC: 1862 case MAT_STRUCTURALLY_SYMMETRIC: 1863 case MAT_HERMITIAN: 1864 case MAT_SYMMETRY_ETERNAL: 1865 break; 1866 case MAT_SUBMAT_SINGLEIS: 1867 A->submat_singleis = flg; 1868 break; 1869 case MAT_STRUCTURE_ONLY: 1870 /* The option is handled directly by MatSetOption() */ 1871 break; 1872 default: 1873 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1874 } 1875 PetscFunctionReturn(0); 1876 } 1877 1878 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1879 { 1880 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1881 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1882 PetscErrorCode ierr; 1883 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1884 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1885 PetscInt *cmap,*idx_p; 1886 1887 PetscFunctionBegin; 1888 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1889 mat->getrowactive = PETSC_TRUE; 1890 1891 if (!mat->rowvalues && (idx || v)) { 1892 /* 1893 allocate enough space to hold information from the longest row. 1894 */ 1895 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1896 PetscInt max = 1,tmp; 1897 for (i=0; i<matin->rmap->n; i++) { 1898 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1899 if (max < tmp) max = tmp; 1900 } 1901 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1902 } 1903 1904 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1905 lrow = row - rstart; 1906 1907 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1908 if (!v) {pvA = 0; pvB = 0;} 1909 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1910 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1911 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1912 nztot = nzA + nzB; 1913 1914 cmap = mat->garray; 1915 if (v || idx) { 1916 if (nztot) { 1917 /* Sort by increasing column numbers, assuming A and B already sorted */ 1918 PetscInt imark = -1; 1919 if (v) { 1920 *v = v_p = mat->rowvalues; 1921 for (i=0; i<nzB; i++) { 1922 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1923 else break; 1924 } 1925 imark = i; 1926 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1927 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1928 } 1929 if (idx) { 1930 *idx = idx_p = mat->rowindices; 1931 if (imark > -1) { 1932 for (i=0; i<imark; i++) { 1933 idx_p[i] = cmap[cworkB[i]]; 1934 } 1935 } else { 1936 for (i=0; i<nzB; i++) { 1937 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1938 else break; 1939 } 1940 imark = i; 1941 } 1942 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1943 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1944 } 1945 } else { 1946 if (idx) *idx = 0; 1947 if (v) *v = 0; 1948 } 1949 } 1950 *nz = nztot; 1951 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 PetscFunctionReturn(0); 1954 } 1955 1956 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1957 { 1958 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1959 1960 PetscFunctionBegin; 1961 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1962 aij->getrowactive = PETSC_FALSE; 1963 PetscFunctionReturn(0); 1964 } 1965 1966 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1967 { 1968 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1969 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1970 PetscErrorCode ierr; 1971 PetscInt i,j,cstart = mat->cmap->rstart; 1972 PetscReal sum = 0.0; 1973 MatScalar *v; 1974 1975 PetscFunctionBegin; 1976 if (aij->size == 1) { 1977 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1978 } else { 1979 if (type == NORM_FROBENIUS) { 1980 v = amat->a; 1981 for (i=0; i<amat->nz; i++) { 1982 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1983 } 1984 v = bmat->a; 1985 for (i=0; i<bmat->nz; i++) { 1986 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1987 } 1988 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1989 *norm = PetscSqrtReal(*norm); 1990 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1991 } else if (type == NORM_1) { /* max column norm */ 1992 PetscReal *tmp,*tmp2; 1993 PetscInt *jj,*garray = aij->garray; 1994 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1995 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1996 *norm = 0.0; 1997 v = amat->a; jj = amat->j; 1998 for (j=0; j<amat->nz; j++) { 1999 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2000 } 2001 v = bmat->a; jj = bmat->j; 2002 for (j=0; j<bmat->nz; j++) { 2003 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2004 } 2005 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2006 for (j=0; j<mat->cmap->N; j++) { 2007 if (tmp2[j] > *norm) *norm = tmp2[j]; 2008 } 2009 ierr = PetscFree(tmp);CHKERRQ(ierr); 2010 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2011 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2012 } else if (type == NORM_INFINITY) { /* max row norm */ 2013 PetscReal ntemp = 0.0; 2014 for (j=0; j<aij->A->rmap->n; j++) { 2015 v = amat->a + amat->i[j]; 2016 sum = 0.0; 2017 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2018 sum += PetscAbsScalar(*v); v++; 2019 } 2020 v = bmat->a + bmat->i[j]; 2021 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2022 sum += PetscAbsScalar(*v); v++; 2023 } 2024 if (sum > ntemp) ntemp = sum; 2025 } 2026 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2027 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2028 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2029 } 2030 PetscFunctionReturn(0); 2031 } 2032 2033 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2034 { 2035 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2036 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2037 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2038 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2039 PetscErrorCode ierr; 2040 Mat B,A_diag,*B_diag; 2041 const MatScalar *array; 2042 2043 PetscFunctionBegin; 2044 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2045 ai = Aloc->i; aj = Aloc->j; 2046 bi = Bloc->i; bj = Bloc->j; 2047 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2048 PetscInt *d_nnz,*g_nnz,*o_nnz; 2049 PetscSFNode *oloc; 2050 PETSC_UNUSED PetscSF sf; 2051 2052 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2053 /* compute d_nnz for preallocation */ 2054 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2055 for (i=0; i<ai[ma]; i++) { 2056 d_nnz[aj[i]]++; 2057 } 2058 /* compute local off-diagonal contributions */ 2059 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2060 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2061 /* map those to global */ 2062 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2063 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2064 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2065 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2066 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2067 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2068 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2069 2070 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2071 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2072 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2073 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2074 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2075 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2076 } else { 2077 B = *matout; 2078 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2079 } 2080 2081 b = (Mat_MPIAIJ*)B->data; 2082 A_diag = a->A; 2083 B_diag = &b->A; 2084 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2085 A_diag_ncol = A_diag->cmap->N; 2086 B_diag_ilen = sub_B_diag->ilen; 2087 B_diag_i = sub_B_diag->i; 2088 2089 /* Set ilen for diagonal of B */ 2090 for (i=0; i<A_diag_ncol; i++) { 2091 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2092 } 2093 2094 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2095 very quickly (=without using MatSetValues), because all writes are local. */ 2096 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2097 2098 /* copy over the B part */ 2099 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2100 array = Bloc->a; 2101 row = A->rmap->rstart; 2102 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2103 cols_tmp = cols; 2104 for (i=0; i<mb; i++) { 2105 ncol = bi[i+1]-bi[i]; 2106 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2107 row++; 2108 array += ncol; cols_tmp += ncol; 2109 } 2110 ierr = PetscFree(cols);CHKERRQ(ierr); 2111 2112 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2113 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2114 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2115 *matout = B; 2116 } else { 2117 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2118 } 2119 PetscFunctionReturn(0); 2120 } 2121 2122 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2123 { 2124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2125 Mat a = aij->A,b = aij->B; 2126 PetscErrorCode ierr; 2127 PetscInt s1,s2,s3; 2128 2129 PetscFunctionBegin; 2130 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2131 if (rr) { 2132 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2133 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2134 /* Overlap communication with computation. */ 2135 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2136 } 2137 if (ll) { 2138 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2139 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2140 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2141 } 2142 /* scale the diagonal block */ 2143 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2144 2145 if (rr) { 2146 /* Do a scatter end and then right scale the off-diagonal block */ 2147 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2148 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2149 } 2150 PetscFunctionReturn(0); 2151 } 2152 2153 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2154 { 2155 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2156 PetscErrorCode ierr; 2157 2158 PetscFunctionBegin; 2159 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2160 PetscFunctionReturn(0); 2161 } 2162 2163 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2164 { 2165 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2166 Mat a,b,c,d; 2167 PetscBool flg; 2168 PetscErrorCode ierr; 2169 2170 PetscFunctionBegin; 2171 a = matA->A; b = matA->B; 2172 c = matB->A; d = matB->B; 2173 2174 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2175 if (flg) { 2176 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2177 } 2178 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2179 PetscFunctionReturn(0); 2180 } 2181 2182 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2183 { 2184 PetscErrorCode ierr; 2185 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2186 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2187 2188 PetscFunctionBegin; 2189 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2190 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2191 /* because of the column compression in the off-processor part of the matrix a->B, 2192 the number of columns in a->B and b->B may be different, hence we cannot call 2193 the MatCopy() directly on the two parts. If need be, we can provide a more 2194 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2195 then copying the submatrices */ 2196 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2197 } else { 2198 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2199 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2200 } 2201 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2202 PetscFunctionReturn(0); 2203 } 2204 2205 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2206 { 2207 PetscErrorCode ierr; 2208 2209 PetscFunctionBegin; 2210 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 /* 2215 Computes the number of nonzeros per row needed for preallocation when X and Y 2216 have different nonzero structure. 2217 */ 2218 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2219 { 2220 PetscInt i,j,k,nzx,nzy; 2221 2222 PetscFunctionBegin; 2223 /* Set the number of nonzeros in the new matrix */ 2224 for (i=0; i<m; i++) { 2225 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2226 nzx = xi[i+1] - xi[i]; 2227 nzy = yi[i+1] - yi[i]; 2228 nnz[i] = 0; 2229 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2230 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2231 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2232 nnz[i]++; 2233 } 2234 for (; k<nzy; k++) nnz[i]++; 2235 } 2236 PetscFunctionReturn(0); 2237 } 2238 2239 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2240 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2241 { 2242 PetscErrorCode ierr; 2243 PetscInt m = Y->rmap->N; 2244 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2245 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2246 2247 PetscFunctionBegin; 2248 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2249 PetscFunctionReturn(0); 2250 } 2251 2252 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2253 { 2254 PetscErrorCode ierr; 2255 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2256 PetscBLASInt bnz,one=1; 2257 Mat_SeqAIJ *x,*y; 2258 2259 PetscFunctionBegin; 2260 if (str == SAME_NONZERO_PATTERN) { 2261 PetscScalar alpha = a; 2262 x = (Mat_SeqAIJ*)xx->A->data; 2263 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2264 y = (Mat_SeqAIJ*)yy->A->data; 2265 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2266 x = (Mat_SeqAIJ*)xx->B->data; 2267 y = (Mat_SeqAIJ*)yy->B->data; 2268 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2269 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2270 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2271 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2272 will be updated */ 2273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2274 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2275 Y->offloadmask = PETSC_OFFLOAD_CPU; 2276 } 2277 #endif 2278 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2279 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2280 } else { 2281 Mat B; 2282 PetscInt *nnz_d,*nnz_o; 2283 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2284 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2285 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2286 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2287 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2288 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2289 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2290 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2291 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2292 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2293 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2294 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2295 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2296 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2297 } 2298 PetscFunctionReturn(0); 2299 } 2300 2301 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2302 2303 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2304 { 2305 #if defined(PETSC_USE_COMPLEX) 2306 PetscErrorCode ierr; 2307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2308 2309 PetscFunctionBegin; 2310 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2311 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2312 #else 2313 PetscFunctionBegin; 2314 #endif 2315 PetscFunctionReturn(0); 2316 } 2317 2318 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2319 { 2320 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2321 PetscErrorCode ierr; 2322 2323 PetscFunctionBegin; 2324 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2325 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2326 PetscFunctionReturn(0); 2327 } 2328 2329 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2330 { 2331 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2336 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2341 { 2342 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2343 PetscErrorCode ierr; 2344 PetscInt i,*idxb = 0; 2345 PetscScalar *va,*vb; 2346 Vec vtmp; 2347 2348 PetscFunctionBegin; 2349 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2350 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2351 if (idx) { 2352 for (i=0; i<A->rmap->n; i++) { 2353 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2354 } 2355 } 2356 2357 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2358 if (idx) { 2359 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2360 } 2361 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2362 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2363 2364 for (i=0; i<A->rmap->n; i++) { 2365 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2366 va[i] = vb[i]; 2367 if (idx) idx[i] = a->garray[idxb[i]]; 2368 } 2369 } 2370 2371 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2372 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2373 ierr = PetscFree(idxb);CHKERRQ(ierr); 2374 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2375 PetscFunctionReturn(0); 2376 } 2377 2378 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2379 { 2380 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2381 PetscErrorCode ierr; 2382 PetscInt i,*idxb = 0; 2383 PetscScalar *va,*vb; 2384 Vec vtmp; 2385 2386 PetscFunctionBegin; 2387 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2388 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2389 if (idx) { 2390 for (i=0; i<A->cmap->n; i++) { 2391 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2392 } 2393 } 2394 2395 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2396 if (idx) { 2397 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2398 } 2399 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2400 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2401 2402 for (i=0; i<A->rmap->n; i++) { 2403 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2404 va[i] = vb[i]; 2405 if (idx) idx[i] = a->garray[idxb[i]]; 2406 } 2407 } 2408 2409 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2410 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2411 ierr = PetscFree(idxb);CHKERRQ(ierr); 2412 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2413 PetscFunctionReturn(0); 2414 } 2415 2416 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2417 { 2418 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2419 PetscInt n = A->rmap->n; 2420 PetscInt cstart = A->cmap->rstart; 2421 PetscInt *cmap = mat->garray; 2422 PetscInt *diagIdx, *offdiagIdx; 2423 Vec diagV, offdiagV; 2424 PetscScalar *a, *diagA, *offdiagA; 2425 PetscInt r; 2426 PetscErrorCode ierr; 2427 2428 PetscFunctionBegin; 2429 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2430 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2431 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2432 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2433 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2434 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2435 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2436 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2437 for (r = 0; r < n; ++r) { 2438 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2439 a[r] = diagA[r]; 2440 idx[r] = cstart + diagIdx[r]; 2441 } else { 2442 a[r] = offdiagA[r]; 2443 idx[r] = cmap[offdiagIdx[r]]; 2444 } 2445 } 2446 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2447 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2448 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2449 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2450 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2451 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2456 { 2457 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2458 PetscInt n = A->rmap->n; 2459 PetscInt cstart = A->cmap->rstart; 2460 PetscInt *cmap = mat->garray; 2461 PetscInt *diagIdx, *offdiagIdx; 2462 Vec diagV, offdiagV; 2463 PetscScalar *a, *diagA, *offdiagA; 2464 PetscInt r; 2465 PetscErrorCode ierr; 2466 2467 PetscFunctionBegin; 2468 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2469 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2470 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2471 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2472 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2473 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2474 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2475 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2476 for (r = 0; r < n; ++r) { 2477 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2478 a[r] = diagA[r]; 2479 idx[r] = cstart + diagIdx[r]; 2480 } else { 2481 a[r] = offdiagA[r]; 2482 idx[r] = cmap[offdiagIdx[r]]; 2483 } 2484 } 2485 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2486 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2487 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2488 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2489 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2490 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2491 PetscFunctionReturn(0); 2492 } 2493 2494 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2495 { 2496 PetscErrorCode ierr; 2497 Mat *dummy; 2498 2499 PetscFunctionBegin; 2500 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2501 *newmat = *dummy; 2502 ierr = PetscFree(dummy);CHKERRQ(ierr); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2507 { 2508 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2509 PetscErrorCode ierr; 2510 2511 PetscFunctionBegin; 2512 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2513 A->factorerrortype = a->A->factorerrortype; 2514 PetscFunctionReturn(0); 2515 } 2516 2517 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2518 { 2519 PetscErrorCode ierr; 2520 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2521 2522 PetscFunctionBegin; 2523 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2524 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2525 if (x->assembled) { 2526 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2527 } else { 2528 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2529 } 2530 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2531 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2532 PetscFunctionReturn(0); 2533 } 2534 2535 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2536 { 2537 PetscFunctionBegin; 2538 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2539 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2540 PetscFunctionReturn(0); 2541 } 2542 2543 /*@ 2544 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2545 2546 Collective on Mat 2547 2548 Input Parameters: 2549 + A - the matrix 2550 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2551 2552 Level: advanced 2553 2554 @*/ 2555 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2556 { 2557 PetscErrorCode ierr; 2558 2559 PetscFunctionBegin; 2560 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2561 PetscFunctionReturn(0); 2562 } 2563 2564 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2565 { 2566 PetscErrorCode ierr; 2567 PetscBool sc = PETSC_FALSE,flg; 2568 2569 PetscFunctionBegin; 2570 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2571 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2572 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2573 if (flg) { 2574 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2575 } 2576 ierr = PetscOptionsTail();CHKERRQ(ierr); 2577 PetscFunctionReturn(0); 2578 } 2579 2580 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2581 { 2582 PetscErrorCode ierr; 2583 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2584 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2585 2586 PetscFunctionBegin; 2587 if (!Y->preallocated) { 2588 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2589 } else if (!aij->nz) { 2590 PetscInt nonew = aij->nonew; 2591 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2592 aij->nonew = nonew; 2593 } 2594 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2595 PetscFunctionReturn(0); 2596 } 2597 2598 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2599 { 2600 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2601 PetscErrorCode ierr; 2602 2603 PetscFunctionBegin; 2604 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2605 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2606 if (d) { 2607 PetscInt rstart; 2608 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2609 *d += rstart; 2610 2611 } 2612 PetscFunctionReturn(0); 2613 } 2614 2615 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2616 { 2617 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2618 PetscErrorCode ierr; 2619 2620 PetscFunctionBegin; 2621 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2622 PetscFunctionReturn(0); 2623 } 2624 2625 /* -------------------------------------------------------------------*/ 2626 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2627 MatGetRow_MPIAIJ, 2628 MatRestoreRow_MPIAIJ, 2629 MatMult_MPIAIJ, 2630 /* 4*/ MatMultAdd_MPIAIJ, 2631 MatMultTranspose_MPIAIJ, 2632 MatMultTransposeAdd_MPIAIJ, 2633 0, 2634 0, 2635 0, 2636 /*10*/ 0, 2637 0, 2638 0, 2639 MatSOR_MPIAIJ, 2640 MatTranspose_MPIAIJ, 2641 /*15*/ MatGetInfo_MPIAIJ, 2642 MatEqual_MPIAIJ, 2643 MatGetDiagonal_MPIAIJ, 2644 MatDiagonalScale_MPIAIJ, 2645 MatNorm_MPIAIJ, 2646 /*20*/ MatAssemblyBegin_MPIAIJ, 2647 MatAssemblyEnd_MPIAIJ, 2648 MatSetOption_MPIAIJ, 2649 MatZeroEntries_MPIAIJ, 2650 /*24*/ MatZeroRows_MPIAIJ, 2651 0, 2652 0, 2653 0, 2654 0, 2655 /*29*/ MatSetUp_MPIAIJ, 2656 0, 2657 0, 2658 MatGetDiagonalBlock_MPIAIJ, 2659 0, 2660 /*34*/ MatDuplicate_MPIAIJ, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /*39*/ MatAXPY_MPIAIJ, 2666 MatCreateSubMatrices_MPIAIJ, 2667 MatIncreaseOverlap_MPIAIJ, 2668 MatGetValues_MPIAIJ, 2669 MatCopy_MPIAIJ, 2670 /*44*/ MatGetRowMax_MPIAIJ, 2671 MatScale_MPIAIJ, 2672 MatShift_MPIAIJ, 2673 MatDiagonalSet_MPIAIJ, 2674 MatZeroRowsColumns_MPIAIJ, 2675 /*49*/ MatSetRandom_MPIAIJ, 2676 0, 2677 0, 2678 0, 2679 0, 2680 /*54*/ MatFDColoringCreate_MPIXAIJ, 2681 0, 2682 MatSetUnfactored_MPIAIJ, 2683 MatPermute_MPIAIJ, 2684 0, 2685 /*59*/ MatCreateSubMatrix_MPIAIJ, 2686 MatDestroy_MPIAIJ, 2687 MatView_MPIAIJ, 2688 0, 2689 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2690 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2691 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2692 0, 2693 0, 2694 0, 2695 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2696 MatGetRowMinAbs_MPIAIJ, 2697 0, 2698 0, 2699 0, 2700 0, 2701 /*75*/ MatFDColoringApply_AIJ, 2702 MatSetFromOptions_MPIAIJ, 2703 0, 2704 0, 2705 MatFindZeroDiagonals_MPIAIJ, 2706 /*80*/ 0, 2707 0, 2708 0, 2709 /*83*/ MatLoad_MPIAIJ, 2710 MatIsSymmetric_MPIAIJ, 2711 0, 2712 0, 2713 0, 2714 0, 2715 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2716 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2717 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2718 MatPtAP_MPIAIJ_MPIAIJ, 2719 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2720 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2721 0, 2722 0, 2723 0, 2724 MatPinToCPU_MPIAIJ, 2725 /*99*/ 0, 2726 0, 2727 0, 2728 MatConjugate_MPIAIJ, 2729 0, 2730 /*104*/MatSetValuesRow_MPIAIJ, 2731 MatRealPart_MPIAIJ, 2732 MatImaginaryPart_MPIAIJ, 2733 0, 2734 0, 2735 /*109*/0, 2736 0, 2737 MatGetRowMin_MPIAIJ, 2738 0, 2739 MatMissingDiagonal_MPIAIJ, 2740 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2741 0, 2742 MatGetGhosts_MPIAIJ, 2743 0, 2744 0, 2745 /*119*/0, 2746 0, 2747 0, 2748 0, 2749 MatGetMultiProcBlock_MPIAIJ, 2750 /*124*/MatFindNonzeroRows_MPIAIJ, 2751 MatGetColumnNorms_MPIAIJ, 2752 MatInvertBlockDiagonal_MPIAIJ, 2753 MatInvertVariableBlockDiagonal_MPIAIJ, 2754 MatCreateSubMatricesMPI_MPIAIJ, 2755 /*129*/0, 2756 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2757 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2758 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2759 0, 2760 /*134*/0, 2761 0, 2762 MatRARt_MPIAIJ_MPIAIJ, 2763 0, 2764 0, 2765 /*139*/MatSetBlockSizes_MPIAIJ, 2766 0, 2767 0, 2768 MatFDColoringSetUp_MPIXAIJ, 2769 MatFindOffBlockDiagonalEntries_MPIAIJ, 2770 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2771 }; 2772 2773 /* ----------------------------------------------------------------------------------------*/ 2774 2775 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2776 { 2777 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2778 PetscErrorCode ierr; 2779 2780 PetscFunctionBegin; 2781 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2782 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2783 PetscFunctionReturn(0); 2784 } 2785 2786 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2787 { 2788 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2789 PetscErrorCode ierr; 2790 2791 PetscFunctionBegin; 2792 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2793 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2794 PetscFunctionReturn(0); 2795 } 2796 2797 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2798 { 2799 Mat_MPIAIJ *b; 2800 PetscErrorCode ierr; 2801 PetscMPIInt size; 2802 2803 PetscFunctionBegin; 2804 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2805 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2806 b = (Mat_MPIAIJ*)B->data; 2807 2808 #if defined(PETSC_USE_CTABLE) 2809 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2810 #else 2811 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2812 #endif 2813 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2814 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2815 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2816 2817 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2818 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2819 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2820 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2821 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2822 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2823 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2824 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2825 2826 if (!B->preallocated) { 2827 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2828 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2829 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2830 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2831 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2832 } 2833 2834 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2835 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2836 B->preallocated = PETSC_TRUE; 2837 B->was_assembled = PETSC_FALSE; 2838 B->assembled = PETSC_FALSE; 2839 PetscFunctionReturn(0); 2840 } 2841 2842 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2843 { 2844 Mat_MPIAIJ *b; 2845 PetscErrorCode ierr; 2846 2847 PetscFunctionBegin; 2848 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2849 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2850 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2851 b = (Mat_MPIAIJ*)B->data; 2852 2853 #if defined(PETSC_USE_CTABLE) 2854 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2855 #else 2856 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2857 #endif 2858 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2859 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2860 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2861 2862 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2863 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2864 B->preallocated = PETSC_TRUE; 2865 B->was_assembled = PETSC_FALSE; 2866 B->assembled = PETSC_FALSE; 2867 PetscFunctionReturn(0); 2868 } 2869 2870 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2871 { 2872 Mat mat; 2873 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2874 PetscErrorCode ierr; 2875 2876 PetscFunctionBegin; 2877 *newmat = 0; 2878 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2879 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2880 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2881 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2882 a = (Mat_MPIAIJ*)mat->data; 2883 2884 mat->factortype = matin->factortype; 2885 mat->assembled = PETSC_TRUE; 2886 mat->insertmode = NOT_SET_VALUES; 2887 mat->preallocated = PETSC_TRUE; 2888 2889 a->size = oldmat->size; 2890 a->rank = oldmat->rank; 2891 a->donotstash = oldmat->donotstash; 2892 a->roworiented = oldmat->roworiented; 2893 a->rowindices = 0; 2894 a->rowvalues = 0; 2895 a->getrowactive = PETSC_FALSE; 2896 2897 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2898 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2899 2900 if (oldmat->colmap) { 2901 #if defined(PETSC_USE_CTABLE) 2902 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2903 #else 2904 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2905 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2906 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2907 #endif 2908 } else a->colmap = 0; 2909 if (oldmat->garray) { 2910 PetscInt len; 2911 len = oldmat->B->cmap->n; 2912 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2913 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2914 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2915 } else a->garray = 0; 2916 2917 /* It may happen MatDuplicate is called with a non-assembled matrix 2918 In fact, MatDuplicate only requires the matrix to be preallocated 2919 This may happen inside a DMCreateMatrix_Shell */ 2920 if (oldmat->lvec) { 2921 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2922 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2923 } 2924 if (oldmat->Mvctx) { 2925 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2926 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2927 } 2928 if (oldmat->Mvctx_mpi1) { 2929 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2930 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2931 } 2932 2933 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2934 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2935 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2936 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2937 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2938 *newmat = mat; 2939 PetscFunctionReturn(0); 2940 } 2941 2942 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2943 { 2944 PetscBool isbinary, ishdf5; 2945 PetscErrorCode ierr; 2946 2947 PetscFunctionBegin; 2948 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2949 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2950 /* force binary viewer to load .info file if it has not yet done so */ 2951 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2952 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2953 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2954 if (isbinary) { 2955 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2956 } else if (ishdf5) { 2957 #if defined(PETSC_HAVE_HDF5) 2958 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2959 #else 2960 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2961 #endif 2962 } else { 2963 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2964 } 2965 PetscFunctionReturn(0); 2966 } 2967 2968 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2969 { 2970 PetscScalar *vals,*svals; 2971 MPI_Comm comm; 2972 PetscErrorCode ierr; 2973 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2974 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2975 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2976 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2977 PetscInt cend,cstart,n,*rowners; 2978 int fd; 2979 PetscInt bs = newMat->rmap->bs; 2980 2981 PetscFunctionBegin; 2982 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2983 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2984 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2985 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2986 if (!rank) { 2987 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2988 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2989 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2990 } 2991 2992 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2993 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2994 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2995 if (bs < 0) bs = 1; 2996 2997 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2998 M = header[1]; N = header[2]; 2999 3000 /* If global sizes are set, check if they are consistent with that given in the file */ 3001 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 3002 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 3003 3004 /* determine ownership of all (block) rows */ 3005 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3006 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3007 else m = newMat->rmap->n; /* Set by user */ 3008 3009 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 3010 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3011 3012 /* First process needs enough room for process with most rows */ 3013 if (!rank) { 3014 mmax = rowners[1]; 3015 for (i=2; i<=size; i++) { 3016 mmax = PetscMax(mmax, rowners[i]); 3017 } 3018 } else mmax = -1; /* unused, but compilers complain */ 3019 3020 rowners[0] = 0; 3021 for (i=2; i<=size; i++) { 3022 rowners[i] += rowners[i-1]; 3023 } 3024 rstart = rowners[rank]; 3025 rend = rowners[rank+1]; 3026 3027 /* distribute row lengths to all processors */ 3028 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3029 if (!rank) { 3030 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 3031 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3032 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3033 for (j=0; j<m; j++) { 3034 procsnz[0] += ourlens[j]; 3035 } 3036 for (i=1; i<size; i++) { 3037 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 3038 /* calculate the number of nonzeros on each processor */ 3039 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3040 procsnz[i] += rowlengths[j]; 3041 } 3042 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3043 } 3044 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3045 } else { 3046 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3047 } 3048 3049 if (!rank) { 3050 /* determine max buffer needed and allocate it */ 3051 maxnz = 0; 3052 for (i=0; i<size; i++) { 3053 maxnz = PetscMax(maxnz,procsnz[i]); 3054 } 3055 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3056 3057 /* read in my part of the matrix column indices */ 3058 nz = procsnz[0]; 3059 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3060 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3061 3062 /* read in every one elses and ship off */ 3063 for (i=1; i<size; i++) { 3064 nz = procsnz[i]; 3065 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3066 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3067 } 3068 ierr = PetscFree(cols);CHKERRQ(ierr); 3069 } else { 3070 /* determine buffer space needed for message */ 3071 nz = 0; 3072 for (i=0; i<m; i++) { 3073 nz += ourlens[i]; 3074 } 3075 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3076 3077 /* receive message of column indices*/ 3078 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3079 } 3080 3081 /* determine column ownership if matrix is not square */ 3082 if (N != M) { 3083 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3084 else n = newMat->cmap->n; 3085 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3086 cstart = cend - n; 3087 } else { 3088 cstart = rstart; 3089 cend = rend; 3090 n = cend - cstart; 3091 } 3092 3093 /* loop over local rows, determining number of off diagonal entries */ 3094 ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr); 3095 jj = 0; 3096 for (i=0; i<m; i++) { 3097 for (j=0; j<ourlens[i]; j++) { 3098 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3099 jj++; 3100 } 3101 } 3102 3103 for (i=0; i<m; i++) { 3104 ourlens[i] -= offlens[i]; 3105 } 3106 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3107 3108 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3109 3110 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3111 3112 for (i=0; i<m; i++) { 3113 ourlens[i] += offlens[i]; 3114 } 3115 3116 if (!rank) { 3117 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3118 3119 /* read in my part of the matrix numerical values */ 3120 nz = procsnz[0]; 3121 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3122 3123 /* insert into matrix */ 3124 jj = rstart; 3125 smycols = mycols; 3126 svals = vals; 3127 for (i=0; i<m; i++) { 3128 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3129 smycols += ourlens[i]; 3130 svals += ourlens[i]; 3131 jj++; 3132 } 3133 3134 /* read in other processors and ship out */ 3135 for (i=1; i<size; i++) { 3136 nz = procsnz[i]; 3137 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3138 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3139 } 3140 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3141 } else { 3142 /* receive numeric values */ 3143 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3144 3145 /* receive message of values*/ 3146 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3147 3148 /* insert into matrix */ 3149 jj = rstart; 3150 smycols = mycols; 3151 svals = vals; 3152 for (i=0; i<m; i++) { 3153 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3154 smycols += ourlens[i]; 3155 svals += ourlens[i]; 3156 jj++; 3157 } 3158 } 3159 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3160 ierr = PetscFree(vals);CHKERRQ(ierr); 3161 ierr = PetscFree(mycols);CHKERRQ(ierr); 3162 ierr = PetscFree(rowners);CHKERRQ(ierr); 3163 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3164 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3165 PetscFunctionReturn(0); 3166 } 3167 3168 /* Not scalable because of ISAllGather() unless getting all columns. */ 3169 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3170 { 3171 PetscErrorCode ierr; 3172 IS iscol_local; 3173 PetscBool isstride; 3174 PetscMPIInt lisstride=0,gisstride; 3175 3176 PetscFunctionBegin; 3177 /* check if we are grabbing all columns*/ 3178 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3179 3180 if (isstride) { 3181 PetscInt start,len,mstart,mlen; 3182 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3183 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3184 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3185 if (mstart == start && mlen-mstart == len) lisstride = 1; 3186 } 3187 3188 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3189 if (gisstride) { 3190 PetscInt N; 3191 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3192 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3193 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3194 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3195 } else { 3196 PetscInt cbs; 3197 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3198 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3199 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3200 } 3201 3202 *isseq = iscol_local; 3203 PetscFunctionReturn(0); 3204 } 3205 3206 /* 3207 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3208 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3209 3210 Input Parameters: 3211 mat - matrix 3212 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3213 i.e., mat->rstart <= isrow[i] < mat->rend 3214 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3215 i.e., mat->cstart <= iscol[i] < mat->cend 3216 Output Parameter: 3217 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3218 iscol_o - sequential column index set for retrieving mat->B 3219 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3220 */ 3221 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3222 { 3223 PetscErrorCode ierr; 3224 Vec x,cmap; 3225 const PetscInt *is_idx; 3226 PetscScalar *xarray,*cmaparray; 3227 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3228 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3229 Mat B=a->B; 3230 Vec lvec=a->lvec,lcmap; 3231 PetscInt i,cstart,cend,Bn=B->cmap->N; 3232 MPI_Comm comm; 3233 VecScatter Mvctx=a->Mvctx; 3234 3235 PetscFunctionBegin; 3236 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3237 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3238 3239 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3240 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3241 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3242 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3243 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3244 3245 /* Get start indices */ 3246 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3247 isstart -= ncols; 3248 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3249 3250 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3251 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3252 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3253 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3254 for (i=0; i<ncols; i++) { 3255 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3256 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3257 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3258 } 3259 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3260 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3261 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3262 3263 /* Get iscol_d */ 3264 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3265 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3266 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3267 3268 /* Get isrow_d */ 3269 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3270 rstart = mat->rmap->rstart; 3271 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3272 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3273 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3274 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3275 3276 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3277 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3278 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3279 3280 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3281 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3282 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3283 3284 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3285 3286 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3287 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3288 3289 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3290 /* off-process column indices */ 3291 count = 0; 3292 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3293 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3294 3295 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3296 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3297 for (i=0; i<Bn; i++) { 3298 if (PetscRealPart(xarray[i]) > -1.0) { 3299 idx[count] = i; /* local column index in off-diagonal part B */ 3300 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3301 count++; 3302 } 3303 } 3304 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3305 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3306 3307 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3308 /* cannot ensure iscol_o has same blocksize as iscol! */ 3309 3310 ierr = PetscFree(idx);CHKERRQ(ierr); 3311 *garray = cmap1; 3312 3313 ierr = VecDestroy(&x);CHKERRQ(ierr); 3314 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3315 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3316 PetscFunctionReturn(0); 3317 } 3318 3319 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3320 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3321 { 3322 PetscErrorCode ierr; 3323 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3324 Mat M = NULL; 3325 MPI_Comm comm; 3326 IS iscol_d,isrow_d,iscol_o; 3327 Mat Asub = NULL,Bsub = NULL; 3328 PetscInt n; 3329 3330 PetscFunctionBegin; 3331 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3332 3333 if (call == MAT_REUSE_MATRIX) { 3334 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3335 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3336 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3337 3338 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3339 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3340 3341 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3342 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3343 3344 /* Update diagonal and off-diagonal portions of submat */ 3345 asub = (Mat_MPIAIJ*)(*submat)->data; 3346 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3347 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3348 if (n) { 3349 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3350 } 3351 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3352 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3353 3354 } else { /* call == MAT_INITIAL_MATRIX) */ 3355 const PetscInt *garray; 3356 PetscInt BsubN; 3357 3358 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3359 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3360 3361 /* Create local submatrices Asub and Bsub */ 3362 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3363 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3364 3365 /* Create submatrix M */ 3366 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3367 3368 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3369 asub = (Mat_MPIAIJ*)M->data; 3370 3371 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3372 n = asub->B->cmap->N; 3373 if (BsubN > n) { 3374 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3375 const PetscInt *idx; 3376 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3377 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3378 3379 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3380 j = 0; 3381 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3382 for (i=0; i<n; i++) { 3383 if (j >= BsubN) break; 3384 while (subgarray[i] > garray[j]) j++; 3385 3386 if (subgarray[i] == garray[j]) { 3387 idx_new[i] = idx[j++]; 3388 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3389 } 3390 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3391 3392 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3393 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3394 3395 } else if (BsubN < n) { 3396 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3397 } 3398 3399 ierr = PetscFree(garray);CHKERRQ(ierr); 3400 *submat = M; 3401 3402 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3403 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3404 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3405 3406 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3407 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3408 3409 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3410 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3411 } 3412 PetscFunctionReturn(0); 3413 } 3414 3415 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3416 { 3417 PetscErrorCode ierr; 3418 IS iscol_local=NULL,isrow_d; 3419 PetscInt csize; 3420 PetscInt n,i,j,start,end; 3421 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3422 MPI_Comm comm; 3423 3424 PetscFunctionBegin; 3425 /* If isrow has same processor distribution as mat, 3426 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3427 if (call == MAT_REUSE_MATRIX) { 3428 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3429 if (isrow_d) { 3430 sameRowDist = PETSC_TRUE; 3431 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3432 } else { 3433 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3434 if (iscol_local) { 3435 sameRowDist = PETSC_TRUE; 3436 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3437 } 3438 } 3439 } else { 3440 /* Check if isrow has same processor distribution as mat */ 3441 sameDist[0] = PETSC_FALSE; 3442 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3443 if (!n) { 3444 sameDist[0] = PETSC_TRUE; 3445 } else { 3446 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3447 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3448 if (i >= start && j < end) { 3449 sameDist[0] = PETSC_TRUE; 3450 } 3451 } 3452 3453 /* Check if iscol has same processor distribution as mat */ 3454 sameDist[1] = PETSC_FALSE; 3455 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3456 if (!n) { 3457 sameDist[1] = PETSC_TRUE; 3458 } else { 3459 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3460 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3461 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3462 } 3463 3464 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3465 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3466 sameRowDist = tsameDist[0]; 3467 } 3468 3469 if (sameRowDist) { 3470 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3471 /* isrow and iscol have same processor distribution as mat */ 3472 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3473 PetscFunctionReturn(0); 3474 } else { /* sameRowDist */ 3475 /* isrow has same processor distribution as mat */ 3476 if (call == MAT_INITIAL_MATRIX) { 3477 PetscBool sorted; 3478 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3479 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3480 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3481 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3482 3483 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3484 if (sorted) { 3485 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3486 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3487 PetscFunctionReturn(0); 3488 } 3489 } else { /* call == MAT_REUSE_MATRIX */ 3490 IS iscol_sub; 3491 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3492 if (iscol_sub) { 3493 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3494 PetscFunctionReturn(0); 3495 } 3496 } 3497 } 3498 } 3499 3500 /* General case: iscol -> iscol_local which has global size of iscol */ 3501 if (call == MAT_REUSE_MATRIX) { 3502 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3503 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3504 } else { 3505 if (!iscol_local) { 3506 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3507 } 3508 } 3509 3510 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3511 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3512 3513 if (call == MAT_INITIAL_MATRIX) { 3514 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3515 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3516 } 3517 PetscFunctionReturn(0); 3518 } 3519 3520 /*@C 3521 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3522 and "off-diagonal" part of the matrix in CSR format. 3523 3524 Collective 3525 3526 Input Parameters: 3527 + comm - MPI communicator 3528 . A - "diagonal" portion of matrix 3529 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3530 - garray - global index of B columns 3531 3532 Output Parameter: 3533 . mat - the matrix, with input A as its local diagonal matrix 3534 Level: advanced 3535 3536 Notes: 3537 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3538 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3539 3540 .seealso: MatCreateMPIAIJWithSplitArrays() 3541 @*/ 3542 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3543 { 3544 PetscErrorCode ierr; 3545 Mat_MPIAIJ *maij; 3546 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3547 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3548 PetscScalar *oa=b->a; 3549 Mat Bnew; 3550 PetscInt m,n,N; 3551 3552 PetscFunctionBegin; 3553 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3554 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3555 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3556 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3557 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3558 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3559 3560 /* Get global columns of mat */ 3561 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3562 3563 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3564 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3565 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3566 maij = (Mat_MPIAIJ*)(*mat)->data; 3567 3568 (*mat)->preallocated = PETSC_TRUE; 3569 3570 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3571 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3572 3573 /* Set A as diagonal portion of *mat */ 3574 maij->A = A; 3575 3576 nz = oi[m]; 3577 for (i=0; i<nz; i++) { 3578 col = oj[i]; 3579 oj[i] = garray[col]; 3580 } 3581 3582 /* Set Bnew as off-diagonal portion of *mat */ 3583 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3584 bnew = (Mat_SeqAIJ*)Bnew->data; 3585 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3586 maij->B = Bnew; 3587 3588 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3589 3590 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3591 b->free_a = PETSC_FALSE; 3592 b->free_ij = PETSC_FALSE; 3593 ierr = MatDestroy(&B);CHKERRQ(ierr); 3594 3595 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3596 bnew->free_a = PETSC_TRUE; 3597 bnew->free_ij = PETSC_TRUE; 3598 3599 /* condense columns of maij->B */ 3600 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3601 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3602 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3603 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3604 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3605 PetscFunctionReturn(0); 3606 } 3607 3608 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3609 3610 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3611 { 3612 PetscErrorCode ierr; 3613 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3614 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3615 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3616 Mat M,Msub,B=a->B; 3617 MatScalar *aa; 3618 Mat_SeqAIJ *aij; 3619 PetscInt *garray = a->garray,*colsub,Ncols; 3620 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3621 IS iscol_sub,iscmap; 3622 const PetscInt *is_idx,*cmap; 3623 PetscBool allcolumns=PETSC_FALSE; 3624 MPI_Comm comm; 3625 3626 PetscFunctionBegin; 3627 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3628 3629 if (call == MAT_REUSE_MATRIX) { 3630 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3631 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3632 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3633 3634 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3635 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3636 3637 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3638 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3639 3640 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3641 3642 } else { /* call == MAT_INITIAL_MATRIX) */ 3643 PetscBool flg; 3644 3645 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3646 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3647 3648 /* (1) iscol -> nonscalable iscol_local */ 3649 /* Check for special case: each processor gets entire matrix columns */ 3650 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3651 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3652 if (allcolumns) { 3653 iscol_sub = iscol_local; 3654 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3655 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3656 3657 } else { 3658 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3659 PetscInt *idx,*cmap1,k; 3660 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3661 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3662 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3663 count = 0; 3664 k = 0; 3665 for (i=0; i<Ncols; i++) { 3666 j = is_idx[i]; 3667 if (j >= cstart && j < cend) { 3668 /* diagonal part of mat */ 3669 idx[count] = j; 3670 cmap1[count++] = i; /* column index in submat */ 3671 } else if (Bn) { 3672 /* off-diagonal part of mat */ 3673 if (j == garray[k]) { 3674 idx[count] = j; 3675 cmap1[count++] = i; /* column index in submat */ 3676 } else if (j > garray[k]) { 3677 while (j > garray[k] && k < Bn-1) k++; 3678 if (j == garray[k]) { 3679 idx[count] = j; 3680 cmap1[count++] = i; /* column index in submat */ 3681 } 3682 } 3683 } 3684 } 3685 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3686 3687 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3688 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3689 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3690 3691 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3692 } 3693 3694 /* (3) Create sequential Msub */ 3695 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3696 } 3697 3698 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3699 aij = (Mat_SeqAIJ*)(Msub)->data; 3700 ii = aij->i; 3701 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3702 3703 /* 3704 m - number of local rows 3705 Ncols - number of columns (same on all processors) 3706 rstart - first row in new global matrix generated 3707 */ 3708 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3709 3710 if (call == MAT_INITIAL_MATRIX) { 3711 /* (4) Create parallel newmat */ 3712 PetscMPIInt rank,size; 3713 PetscInt csize; 3714 3715 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3716 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3717 3718 /* 3719 Determine the number of non-zeros in the diagonal and off-diagonal 3720 portions of the matrix in order to do correct preallocation 3721 */ 3722 3723 /* first get start and end of "diagonal" columns */ 3724 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3725 if (csize == PETSC_DECIDE) { 3726 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3727 if (mglobal == Ncols) { /* square matrix */ 3728 nlocal = m; 3729 } else { 3730 nlocal = Ncols/size + ((Ncols % size) > rank); 3731 } 3732 } else { 3733 nlocal = csize; 3734 } 3735 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3736 rstart = rend - nlocal; 3737 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3738 3739 /* next, compute all the lengths */ 3740 jj = aij->j; 3741 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3742 olens = dlens + m; 3743 for (i=0; i<m; i++) { 3744 jend = ii[i+1] - ii[i]; 3745 olen = 0; 3746 dlen = 0; 3747 for (j=0; j<jend; j++) { 3748 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3749 else dlen++; 3750 jj++; 3751 } 3752 olens[i] = olen; 3753 dlens[i] = dlen; 3754 } 3755 3756 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3757 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3758 3759 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3760 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3761 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3762 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3763 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3764 ierr = PetscFree(dlens);CHKERRQ(ierr); 3765 3766 } else { /* call == MAT_REUSE_MATRIX */ 3767 M = *newmat; 3768 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3769 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3770 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3771 /* 3772 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3773 rather than the slower MatSetValues(). 3774 */ 3775 M->was_assembled = PETSC_TRUE; 3776 M->assembled = PETSC_FALSE; 3777 } 3778 3779 /* (5) Set values of Msub to *newmat */ 3780 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3781 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3782 3783 jj = aij->j; 3784 aa = aij->a; 3785 for (i=0; i<m; i++) { 3786 row = rstart + i; 3787 nz = ii[i+1] - ii[i]; 3788 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3789 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3790 jj += nz; aa += nz; 3791 } 3792 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3793 3794 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3795 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3796 3797 ierr = PetscFree(colsub);CHKERRQ(ierr); 3798 3799 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3800 if (call == MAT_INITIAL_MATRIX) { 3801 *newmat = M; 3802 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3803 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3804 3805 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3806 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3807 3808 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3809 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3810 3811 if (iscol_local) { 3812 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3813 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3814 } 3815 } 3816 PetscFunctionReturn(0); 3817 } 3818 3819 /* 3820 Not great since it makes two copies of the submatrix, first an SeqAIJ 3821 in local and then by concatenating the local matrices the end result. 3822 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3823 3824 Note: This requires a sequential iscol with all indices. 3825 */ 3826 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3827 { 3828 PetscErrorCode ierr; 3829 PetscMPIInt rank,size; 3830 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3831 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3832 Mat M,Mreuse; 3833 MatScalar *aa,*vwork; 3834 MPI_Comm comm; 3835 Mat_SeqAIJ *aij; 3836 PetscBool colflag,allcolumns=PETSC_FALSE; 3837 3838 PetscFunctionBegin; 3839 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3840 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3841 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3842 3843 /* Check for special case: each processor gets entire matrix columns */ 3844 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3845 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3846 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3847 3848 if (call == MAT_REUSE_MATRIX) { 3849 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3850 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3851 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3852 } else { 3853 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3854 } 3855 3856 /* 3857 m - number of local rows 3858 n - number of columns (same on all processors) 3859 rstart - first row in new global matrix generated 3860 */ 3861 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3862 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3863 if (call == MAT_INITIAL_MATRIX) { 3864 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3865 ii = aij->i; 3866 jj = aij->j; 3867 3868 /* 3869 Determine the number of non-zeros in the diagonal and off-diagonal 3870 portions of the matrix in order to do correct preallocation 3871 */ 3872 3873 /* first get start and end of "diagonal" columns */ 3874 if (csize == PETSC_DECIDE) { 3875 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3876 if (mglobal == n) { /* square matrix */ 3877 nlocal = m; 3878 } else { 3879 nlocal = n/size + ((n % size) > rank); 3880 } 3881 } else { 3882 nlocal = csize; 3883 } 3884 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3885 rstart = rend - nlocal; 3886 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3887 3888 /* next, compute all the lengths */ 3889 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3890 olens = dlens + m; 3891 for (i=0; i<m; i++) { 3892 jend = ii[i+1] - ii[i]; 3893 olen = 0; 3894 dlen = 0; 3895 for (j=0; j<jend; j++) { 3896 if (*jj < rstart || *jj >= rend) olen++; 3897 else dlen++; 3898 jj++; 3899 } 3900 olens[i] = olen; 3901 dlens[i] = dlen; 3902 } 3903 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3904 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3905 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3906 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3907 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3908 ierr = PetscFree(dlens);CHKERRQ(ierr); 3909 } else { 3910 PetscInt ml,nl; 3911 3912 M = *newmat; 3913 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3914 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3915 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3916 /* 3917 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3918 rather than the slower MatSetValues(). 3919 */ 3920 M->was_assembled = PETSC_TRUE; 3921 M->assembled = PETSC_FALSE; 3922 } 3923 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3924 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3925 ii = aij->i; 3926 jj = aij->j; 3927 aa = aij->a; 3928 for (i=0; i<m; i++) { 3929 row = rstart + i; 3930 nz = ii[i+1] - ii[i]; 3931 cwork = jj; jj += nz; 3932 vwork = aa; aa += nz; 3933 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3934 } 3935 3936 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3937 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3938 *newmat = M; 3939 3940 /* save submatrix used in processor for next request */ 3941 if (call == MAT_INITIAL_MATRIX) { 3942 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3943 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3944 } 3945 PetscFunctionReturn(0); 3946 } 3947 3948 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3949 { 3950 PetscInt m,cstart, cend,j,nnz,i,d; 3951 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3952 const PetscInt *JJ; 3953 PetscErrorCode ierr; 3954 PetscBool nooffprocentries; 3955 3956 PetscFunctionBegin; 3957 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3958 3959 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3960 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3961 m = B->rmap->n; 3962 cstart = B->cmap->rstart; 3963 cend = B->cmap->rend; 3964 rstart = B->rmap->rstart; 3965 3966 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3967 3968 #if defined(PETSC_USE_DEBUG) 3969 for (i=0; i<m; i++) { 3970 nnz = Ii[i+1]- Ii[i]; 3971 JJ = J + Ii[i]; 3972 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3973 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3974 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3975 } 3976 #endif 3977 3978 for (i=0; i<m; i++) { 3979 nnz = Ii[i+1]- Ii[i]; 3980 JJ = J + Ii[i]; 3981 nnz_max = PetscMax(nnz_max,nnz); 3982 d = 0; 3983 for (j=0; j<nnz; j++) { 3984 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3985 } 3986 d_nnz[i] = d; 3987 o_nnz[i] = nnz - d; 3988 } 3989 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3990 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3991 3992 for (i=0; i<m; i++) { 3993 ii = i + rstart; 3994 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3995 } 3996 nooffprocentries = B->nooffprocentries; 3997 B->nooffprocentries = PETSC_TRUE; 3998 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3999 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4000 B->nooffprocentries = nooffprocentries; 4001 4002 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4003 PetscFunctionReturn(0); 4004 } 4005 4006 /*@ 4007 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4008 (the default parallel PETSc format). 4009 4010 Collective 4011 4012 Input Parameters: 4013 + B - the matrix 4014 . i - the indices into j for the start of each local row (starts with zero) 4015 . j - the column indices for each local row (starts with zero) 4016 - v - optional values in the matrix 4017 4018 Level: developer 4019 4020 Notes: 4021 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4022 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4023 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4024 4025 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4026 4027 The format which is used for the sparse matrix input, is equivalent to a 4028 row-major ordering.. i.e for the following matrix, the input data expected is 4029 as shown 4030 4031 $ 1 0 0 4032 $ 2 0 3 P0 4033 $ ------- 4034 $ 4 5 6 P1 4035 $ 4036 $ Process0 [P0]: rows_owned=[0,1] 4037 $ i = {0,1,3} [size = nrow+1 = 2+1] 4038 $ j = {0,0,2} [size = 3] 4039 $ v = {1,2,3} [size = 3] 4040 $ 4041 $ Process1 [P1]: rows_owned=[2] 4042 $ i = {0,3} [size = nrow+1 = 1+1] 4043 $ j = {0,1,2} [size = 3] 4044 $ v = {4,5,6} [size = 3] 4045 4046 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4047 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4048 @*/ 4049 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4050 { 4051 PetscErrorCode ierr; 4052 4053 PetscFunctionBegin; 4054 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4055 PetscFunctionReturn(0); 4056 } 4057 4058 /*@C 4059 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4060 (the default parallel PETSc format). For good matrix assembly performance 4061 the user should preallocate the matrix storage by setting the parameters 4062 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4063 performance can be increased by more than a factor of 50. 4064 4065 Collective 4066 4067 Input Parameters: 4068 + B - the matrix 4069 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4070 (same value is used for all local rows) 4071 . d_nnz - array containing the number of nonzeros in the various rows of the 4072 DIAGONAL portion of the local submatrix (possibly different for each row) 4073 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4074 The size of this array is equal to the number of local rows, i.e 'm'. 4075 For matrices that will be factored, you must leave room for (and set) 4076 the diagonal entry even if it is zero. 4077 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4078 submatrix (same value is used for all local rows). 4079 - o_nnz - array containing the number of nonzeros in the various rows of the 4080 OFF-DIAGONAL portion of the local submatrix (possibly different for 4081 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4082 structure. The size of this array is equal to the number 4083 of local rows, i.e 'm'. 4084 4085 If the *_nnz parameter is given then the *_nz parameter is ignored 4086 4087 The AIJ format (also called the Yale sparse matrix format or 4088 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4089 storage. The stored row and column indices begin with zero. 4090 See Users-Manual: ch_mat for details. 4091 4092 The parallel matrix is partitioned such that the first m0 rows belong to 4093 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4094 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4095 4096 The DIAGONAL portion of the local submatrix of a processor can be defined 4097 as the submatrix which is obtained by extraction the part corresponding to 4098 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4099 first row that belongs to the processor, r2 is the last row belonging to 4100 the this processor, and c1-c2 is range of indices of the local part of a 4101 vector suitable for applying the matrix to. This is an mxn matrix. In the 4102 common case of a square matrix, the row and column ranges are the same and 4103 the DIAGONAL part is also square. The remaining portion of the local 4104 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4105 4106 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4107 4108 You can call MatGetInfo() to get information on how effective the preallocation was; 4109 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4110 You can also run with the option -info and look for messages with the string 4111 malloc in them to see if additional memory allocation was needed. 4112 4113 Example usage: 4114 4115 Consider the following 8x8 matrix with 34 non-zero values, that is 4116 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4117 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4118 as follows: 4119 4120 .vb 4121 1 2 0 | 0 3 0 | 0 4 4122 Proc0 0 5 6 | 7 0 0 | 8 0 4123 9 0 10 | 11 0 0 | 12 0 4124 ------------------------------------- 4125 13 0 14 | 15 16 17 | 0 0 4126 Proc1 0 18 0 | 19 20 21 | 0 0 4127 0 0 0 | 22 23 0 | 24 0 4128 ------------------------------------- 4129 Proc2 25 26 27 | 0 0 28 | 29 0 4130 30 0 0 | 31 32 33 | 0 34 4131 .ve 4132 4133 This can be represented as a collection of submatrices as: 4134 4135 .vb 4136 A B C 4137 D E F 4138 G H I 4139 .ve 4140 4141 Where the submatrices A,B,C are owned by proc0, D,E,F are 4142 owned by proc1, G,H,I are owned by proc2. 4143 4144 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4145 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4146 The 'M','N' parameters are 8,8, and have the same values on all procs. 4147 4148 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4149 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4150 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4151 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4152 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4153 matrix, ans [DF] as another SeqAIJ matrix. 4154 4155 When d_nz, o_nz parameters are specified, d_nz storage elements are 4156 allocated for every row of the local diagonal submatrix, and o_nz 4157 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4158 One way to choose d_nz and o_nz is to use the max nonzerors per local 4159 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4160 In this case, the values of d_nz,o_nz are: 4161 .vb 4162 proc0 : dnz = 2, o_nz = 2 4163 proc1 : dnz = 3, o_nz = 2 4164 proc2 : dnz = 1, o_nz = 4 4165 .ve 4166 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4167 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4168 for proc3. i.e we are using 12+15+10=37 storage locations to store 4169 34 values. 4170 4171 When d_nnz, o_nnz parameters are specified, the storage is specified 4172 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4173 In the above case the values for d_nnz,o_nnz are: 4174 .vb 4175 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4176 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4177 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4178 .ve 4179 Here the space allocated is sum of all the above values i.e 34, and 4180 hence pre-allocation is perfect. 4181 4182 Level: intermediate 4183 4184 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4185 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4186 @*/ 4187 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4188 { 4189 PetscErrorCode ierr; 4190 4191 PetscFunctionBegin; 4192 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4193 PetscValidType(B,1); 4194 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4195 PetscFunctionReturn(0); 4196 } 4197 4198 /*@ 4199 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4200 CSR format for the local rows. 4201 4202 Collective 4203 4204 Input Parameters: 4205 + comm - MPI communicator 4206 . m - number of local rows (Cannot be PETSC_DECIDE) 4207 . n - This value should be the same as the local size used in creating the 4208 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4209 calculated if N is given) For square matrices n is almost always m. 4210 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4211 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4212 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4213 . j - column indices 4214 - a - matrix values 4215 4216 Output Parameter: 4217 . mat - the matrix 4218 4219 Level: intermediate 4220 4221 Notes: 4222 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4223 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4224 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4225 4226 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4227 4228 The format which is used for the sparse matrix input, is equivalent to a 4229 row-major ordering.. i.e for the following matrix, the input data expected is 4230 as shown 4231 4232 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4233 4234 $ 1 0 0 4235 $ 2 0 3 P0 4236 $ ------- 4237 $ 4 5 6 P1 4238 $ 4239 $ Process0 [P0]: rows_owned=[0,1] 4240 $ i = {0,1,3} [size = nrow+1 = 2+1] 4241 $ j = {0,0,2} [size = 3] 4242 $ v = {1,2,3} [size = 3] 4243 $ 4244 $ Process1 [P1]: rows_owned=[2] 4245 $ i = {0,3} [size = nrow+1 = 1+1] 4246 $ j = {0,1,2} [size = 3] 4247 $ v = {4,5,6} [size = 3] 4248 4249 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4250 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4251 @*/ 4252 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4253 { 4254 PetscErrorCode ierr; 4255 4256 PetscFunctionBegin; 4257 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4258 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4259 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4260 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4261 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4262 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4263 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4264 PetscFunctionReturn(0); 4265 } 4266 4267 /*@ 4268 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4269 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4270 4271 Collective 4272 4273 Input Parameters: 4274 + mat - the matrix 4275 . m - number of local rows (Cannot be PETSC_DECIDE) 4276 . n - This value should be the same as the local size used in creating the 4277 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4278 calculated if N is given) For square matrices n is almost always m. 4279 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4280 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4281 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4282 . J - column indices 4283 - v - matrix values 4284 4285 Level: intermediate 4286 4287 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4288 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4289 @*/ 4290 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4291 { 4292 PetscErrorCode ierr; 4293 PetscInt cstart,nnz,i,j; 4294 PetscInt *ld; 4295 PetscBool nooffprocentries; 4296 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4297 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4298 PetscScalar *ad = Ad->a, *ao = Ao->a; 4299 const PetscInt *Adi = Ad->i; 4300 PetscInt ldi,Iii,md; 4301 4302 PetscFunctionBegin; 4303 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4304 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4305 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4306 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4307 4308 cstart = mat->cmap->rstart; 4309 if (!Aij->ld) { 4310 /* count number of entries below block diagonal */ 4311 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4312 Aij->ld = ld; 4313 for (i=0; i<m; i++) { 4314 nnz = Ii[i+1]- Ii[i]; 4315 j = 0; 4316 while (J[j] < cstart && j < nnz) {j++;} 4317 J += nnz; 4318 ld[i] = j; 4319 } 4320 } else { 4321 ld = Aij->ld; 4322 } 4323 4324 for (i=0; i<m; i++) { 4325 nnz = Ii[i+1]- Ii[i]; 4326 Iii = Ii[i]; 4327 ldi = ld[i]; 4328 md = Adi[i+1]-Adi[i]; 4329 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4330 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4331 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4332 ad += md; 4333 ao += nnz - md; 4334 } 4335 nooffprocentries = mat->nooffprocentries; 4336 mat->nooffprocentries = PETSC_TRUE; 4337 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4338 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4339 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4342 mat->nooffprocentries = nooffprocentries; 4343 PetscFunctionReturn(0); 4344 } 4345 4346 /*@C 4347 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4348 (the default parallel PETSc format). For good matrix assembly performance 4349 the user should preallocate the matrix storage by setting the parameters 4350 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4351 performance can be increased by more than a factor of 50. 4352 4353 Collective 4354 4355 Input Parameters: 4356 + comm - MPI communicator 4357 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4358 This value should be the same as the local size used in creating the 4359 y vector for the matrix-vector product y = Ax. 4360 . n - This value should be the same as the local size used in creating the 4361 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4362 calculated if N is given) For square matrices n is almost always m. 4363 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4364 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4365 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4366 (same value is used for all local rows) 4367 . d_nnz - array containing the number of nonzeros in the various rows of the 4368 DIAGONAL portion of the local submatrix (possibly different for each row) 4369 or NULL, if d_nz is used to specify the nonzero structure. 4370 The size of this array is equal to the number of local rows, i.e 'm'. 4371 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4372 submatrix (same value is used for all local rows). 4373 - o_nnz - array containing the number of nonzeros in the various rows of the 4374 OFF-DIAGONAL portion of the local submatrix (possibly different for 4375 each row) or NULL, if o_nz is used to specify the nonzero 4376 structure. The size of this array is equal to the number 4377 of local rows, i.e 'm'. 4378 4379 Output Parameter: 4380 . A - the matrix 4381 4382 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4383 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4384 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4385 4386 Notes: 4387 If the *_nnz parameter is given then the *_nz parameter is ignored 4388 4389 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4390 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4391 storage requirements for this matrix. 4392 4393 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4394 processor than it must be used on all processors that share the object for 4395 that argument. 4396 4397 The user MUST specify either the local or global matrix dimensions 4398 (possibly both). 4399 4400 The parallel matrix is partitioned across processors such that the 4401 first m0 rows belong to process 0, the next m1 rows belong to 4402 process 1, the next m2 rows belong to process 2 etc.. where 4403 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4404 values corresponding to [m x N] submatrix. 4405 4406 The columns are logically partitioned with the n0 columns belonging 4407 to 0th partition, the next n1 columns belonging to the next 4408 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4409 4410 The DIAGONAL portion of the local submatrix on any given processor 4411 is the submatrix corresponding to the rows and columns m,n 4412 corresponding to the given processor. i.e diagonal matrix on 4413 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4414 etc. The remaining portion of the local submatrix [m x (N-n)] 4415 constitute the OFF-DIAGONAL portion. The example below better 4416 illustrates this concept. 4417 4418 For a square global matrix we define each processor's diagonal portion 4419 to be its local rows and the corresponding columns (a square submatrix); 4420 each processor's off-diagonal portion encompasses the remainder of the 4421 local matrix (a rectangular submatrix). 4422 4423 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4424 4425 When calling this routine with a single process communicator, a matrix of 4426 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4427 type of communicator, use the construction mechanism 4428 .vb 4429 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4430 .ve 4431 4432 $ MatCreate(...,&A); 4433 $ MatSetType(A,MATMPIAIJ); 4434 $ MatSetSizes(A, m,n,M,N); 4435 $ MatMPIAIJSetPreallocation(A,...); 4436 4437 By default, this format uses inodes (identical nodes) when possible. 4438 We search for consecutive rows with the same nonzero structure, thereby 4439 reusing matrix information to achieve increased efficiency. 4440 4441 Options Database Keys: 4442 + -mat_no_inode - Do not use inodes 4443 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4444 4445 4446 4447 Example usage: 4448 4449 Consider the following 8x8 matrix with 34 non-zero values, that is 4450 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4451 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4452 as follows 4453 4454 .vb 4455 1 2 0 | 0 3 0 | 0 4 4456 Proc0 0 5 6 | 7 0 0 | 8 0 4457 9 0 10 | 11 0 0 | 12 0 4458 ------------------------------------- 4459 13 0 14 | 15 16 17 | 0 0 4460 Proc1 0 18 0 | 19 20 21 | 0 0 4461 0 0 0 | 22 23 0 | 24 0 4462 ------------------------------------- 4463 Proc2 25 26 27 | 0 0 28 | 29 0 4464 30 0 0 | 31 32 33 | 0 34 4465 .ve 4466 4467 This can be represented as a collection of submatrices as 4468 4469 .vb 4470 A B C 4471 D E F 4472 G H I 4473 .ve 4474 4475 Where the submatrices A,B,C are owned by proc0, D,E,F are 4476 owned by proc1, G,H,I are owned by proc2. 4477 4478 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4479 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4480 The 'M','N' parameters are 8,8, and have the same values on all procs. 4481 4482 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4483 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4484 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4485 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4486 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4487 matrix, ans [DF] as another SeqAIJ matrix. 4488 4489 When d_nz, o_nz parameters are specified, d_nz storage elements are 4490 allocated for every row of the local diagonal submatrix, and o_nz 4491 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4492 One way to choose d_nz and o_nz is to use the max nonzerors per local 4493 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4494 In this case, the values of d_nz,o_nz are 4495 .vb 4496 proc0 : dnz = 2, o_nz = 2 4497 proc1 : dnz = 3, o_nz = 2 4498 proc2 : dnz = 1, o_nz = 4 4499 .ve 4500 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4501 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4502 for proc3. i.e we are using 12+15+10=37 storage locations to store 4503 34 values. 4504 4505 When d_nnz, o_nnz parameters are specified, the storage is specified 4506 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4507 In the above case the values for d_nnz,o_nnz are 4508 .vb 4509 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4510 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4511 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4512 .ve 4513 Here the space allocated is sum of all the above values i.e 34, and 4514 hence pre-allocation is perfect. 4515 4516 Level: intermediate 4517 4518 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4519 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4520 @*/ 4521 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4522 { 4523 PetscErrorCode ierr; 4524 PetscMPIInt size; 4525 4526 PetscFunctionBegin; 4527 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4528 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4529 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4530 if (size > 1) { 4531 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4532 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4533 } else { 4534 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4535 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4536 } 4537 PetscFunctionReturn(0); 4538 } 4539 4540 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4541 { 4542 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4543 PetscBool flg; 4544 PetscErrorCode ierr; 4545 4546 PetscFunctionBegin; 4547 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4548 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4549 if (Ad) *Ad = a->A; 4550 if (Ao) *Ao = a->B; 4551 if (colmap) *colmap = a->garray; 4552 PetscFunctionReturn(0); 4553 } 4554 4555 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4556 { 4557 PetscErrorCode ierr; 4558 PetscInt m,N,i,rstart,nnz,Ii; 4559 PetscInt *indx; 4560 PetscScalar *values; 4561 4562 PetscFunctionBegin; 4563 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4564 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4565 PetscInt *dnz,*onz,sum,bs,cbs; 4566 4567 if (n == PETSC_DECIDE) { 4568 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4569 } 4570 /* Check sum(n) = N */ 4571 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4572 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4573 4574 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4575 rstart -= m; 4576 4577 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4578 for (i=0; i<m; i++) { 4579 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4580 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4581 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4582 } 4583 4584 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4585 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4586 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4587 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4588 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4589 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4590 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4591 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4592 } 4593 4594 /* numeric phase */ 4595 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4596 for (i=0; i<m; i++) { 4597 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4598 Ii = i + rstart; 4599 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4600 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4601 } 4602 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4603 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4604 PetscFunctionReturn(0); 4605 } 4606 4607 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4608 { 4609 PetscErrorCode ierr; 4610 PetscMPIInt rank; 4611 PetscInt m,N,i,rstart,nnz; 4612 size_t len; 4613 const PetscInt *indx; 4614 PetscViewer out; 4615 char *name; 4616 Mat B; 4617 const PetscScalar *values; 4618 4619 PetscFunctionBegin; 4620 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4621 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4622 /* Should this be the type of the diagonal block of A? */ 4623 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4624 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4625 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4626 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4627 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4628 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4629 for (i=0; i<m; i++) { 4630 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4631 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4632 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4633 } 4634 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4635 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4636 4637 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4638 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4639 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4640 sprintf(name,"%s.%d",outfile,rank); 4641 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4642 ierr = PetscFree(name);CHKERRQ(ierr); 4643 ierr = MatView(B,out);CHKERRQ(ierr); 4644 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4645 ierr = MatDestroy(&B);CHKERRQ(ierr); 4646 PetscFunctionReturn(0); 4647 } 4648 4649 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4650 { 4651 PetscErrorCode ierr; 4652 Mat_Merge_SeqsToMPI *merge; 4653 PetscContainer container; 4654 4655 PetscFunctionBegin; 4656 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4657 if (container) { 4658 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4659 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4660 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4661 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4662 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4663 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4664 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4665 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4666 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4667 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4668 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4669 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4670 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4671 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4672 ierr = PetscFree(merge);CHKERRQ(ierr); 4673 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4674 } 4675 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4676 PetscFunctionReturn(0); 4677 } 4678 4679 #include <../src/mat/utils/freespace.h> 4680 #include <petscbt.h> 4681 4682 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4683 { 4684 PetscErrorCode ierr; 4685 MPI_Comm comm; 4686 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4687 PetscMPIInt size,rank,taga,*len_s; 4688 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4689 PetscInt proc,m; 4690 PetscInt **buf_ri,**buf_rj; 4691 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4692 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4693 MPI_Request *s_waits,*r_waits; 4694 MPI_Status *status; 4695 MatScalar *aa=a->a; 4696 MatScalar **abuf_r,*ba_i; 4697 Mat_Merge_SeqsToMPI *merge; 4698 PetscContainer container; 4699 4700 PetscFunctionBegin; 4701 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4702 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4703 4704 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4705 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4706 4707 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4708 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4709 4710 bi = merge->bi; 4711 bj = merge->bj; 4712 buf_ri = merge->buf_ri; 4713 buf_rj = merge->buf_rj; 4714 4715 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4716 owners = merge->rowmap->range; 4717 len_s = merge->len_s; 4718 4719 /* send and recv matrix values */ 4720 /*-----------------------------*/ 4721 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4722 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4723 4724 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4725 for (proc=0,k=0; proc<size; proc++) { 4726 if (!len_s[proc]) continue; 4727 i = owners[proc]; 4728 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4729 k++; 4730 } 4731 4732 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4733 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4734 ierr = PetscFree(status);CHKERRQ(ierr); 4735 4736 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4737 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4738 4739 /* insert mat values of mpimat */ 4740 /*----------------------------*/ 4741 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4742 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4743 4744 for (k=0; k<merge->nrecv; k++) { 4745 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4746 nrows = *(buf_ri_k[k]); 4747 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4748 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4749 } 4750 4751 /* set values of ba */ 4752 m = merge->rowmap->n; 4753 for (i=0; i<m; i++) { 4754 arow = owners[rank] + i; 4755 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4756 bnzi = bi[i+1] - bi[i]; 4757 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4758 4759 /* add local non-zero vals of this proc's seqmat into ba */ 4760 anzi = ai[arow+1] - ai[arow]; 4761 aj = a->j + ai[arow]; 4762 aa = a->a + ai[arow]; 4763 nextaj = 0; 4764 for (j=0; nextaj<anzi; j++) { 4765 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4766 ba_i[j] += aa[nextaj++]; 4767 } 4768 } 4769 4770 /* add received vals into ba */ 4771 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4772 /* i-th row */ 4773 if (i == *nextrow[k]) { 4774 anzi = *(nextai[k]+1) - *nextai[k]; 4775 aj = buf_rj[k] + *(nextai[k]); 4776 aa = abuf_r[k] + *(nextai[k]); 4777 nextaj = 0; 4778 for (j=0; nextaj<anzi; j++) { 4779 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4780 ba_i[j] += aa[nextaj++]; 4781 } 4782 } 4783 nextrow[k]++; nextai[k]++; 4784 } 4785 } 4786 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4787 } 4788 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4789 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4790 4791 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4792 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4793 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4794 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4795 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4796 PetscFunctionReturn(0); 4797 } 4798 4799 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4800 { 4801 PetscErrorCode ierr; 4802 Mat B_mpi; 4803 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4804 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4805 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4806 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4807 PetscInt len,proc,*dnz,*onz,bs,cbs; 4808 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4809 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4810 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4811 MPI_Status *status; 4812 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4813 PetscBT lnkbt; 4814 Mat_Merge_SeqsToMPI *merge; 4815 PetscContainer container; 4816 4817 PetscFunctionBegin; 4818 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4819 4820 /* make sure it is a PETSc comm */ 4821 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4822 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4823 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4824 4825 ierr = PetscNew(&merge);CHKERRQ(ierr); 4826 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4827 4828 /* determine row ownership */ 4829 /*---------------------------------------------------------*/ 4830 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4831 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4832 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4833 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4834 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4835 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4836 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4837 4838 m = merge->rowmap->n; 4839 owners = merge->rowmap->range; 4840 4841 /* determine the number of messages to send, their lengths */ 4842 /*---------------------------------------------------------*/ 4843 len_s = merge->len_s; 4844 4845 len = 0; /* length of buf_si[] */ 4846 merge->nsend = 0; 4847 for (proc=0; proc<size; proc++) { 4848 len_si[proc] = 0; 4849 if (proc == rank) { 4850 len_s[proc] = 0; 4851 } else { 4852 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4853 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4854 } 4855 if (len_s[proc]) { 4856 merge->nsend++; 4857 nrows = 0; 4858 for (i=owners[proc]; i<owners[proc+1]; i++) { 4859 if (ai[i+1] > ai[i]) nrows++; 4860 } 4861 len_si[proc] = 2*(nrows+1); 4862 len += len_si[proc]; 4863 } 4864 } 4865 4866 /* determine the number and length of messages to receive for ij-structure */ 4867 /*-------------------------------------------------------------------------*/ 4868 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4869 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4870 4871 /* post the Irecv of j-structure */ 4872 /*-------------------------------*/ 4873 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4874 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4875 4876 /* post the Isend of j-structure */ 4877 /*--------------------------------*/ 4878 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4879 4880 for (proc=0, k=0; proc<size; proc++) { 4881 if (!len_s[proc]) continue; 4882 i = owners[proc]; 4883 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4884 k++; 4885 } 4886 4887 /* receives and sends of j-structure are complete */ 4888 /*------------------------------------------------*/ 4889 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4890 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4891 4892 /* send and recv i-structure */ 4893 /*---------------------------*/ 4894 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4895 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4896 4897 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4898 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4899 for (proc=0,k=0; proc<size; proc++) { 4900 if (!len_s[proc]) continue; 4901 /* form outgoing message for i-structure: 4902 buf_si[0]: nrows to be sent 4903 [1:nrows]: row index (global) 4904 [nrows+1:2*nrows+1]: i-structure index 4905 */ 4906 /*-------------------------------------------*/ 4907 nrows = len_si[proc]/2 - 1; 4908 buf_si_i = buf_si + nrows+1; 4909 buf_si[0] = nrows; 4910 buf_si_i[0] = 0; 4911 nrows = 0; 4912 for (i=owners[proc]; i<owners[proc+1]; i++) { 4913 anzi = ai[i+1] - ai[i]; 4914 if (anzi) { 4915 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4916 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4917 nrows++; 4918 } 4919 } 4920 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4921 k++; 4922 buf_si += len_si[proc]; 4923 } 4924 4925 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4926 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4927 4928 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4929 for (i=0; i<merge->nrecv; i++) { 4930 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4931 } 4932 4933 ierr = PetscFree(len_si);CHKERRQ(ierr); 4934 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4935 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4936 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4937 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4938 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4939 ierr = PetscFree(status);CHKERRQ(ierr); 4940 4941 /* compute a local seq matrix in each processor */ 4942 /*----------------------------------------------*/ 4943 /* allocate bi array and free space for accumulating nonzero column info */ 4944 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4945 bi[0] = 0; 4946 4947 /* create and initialize a linked list */ 4948 nlnk = N+1; 4949 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4950 4951 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4952 len = ai[owners[rank+1]] - ai[owners[rank]]; 4953 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4954 4955 current_space = free_space; 4956 4957 /* determine symbolic info for each local row */ 4958 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4959 4960 for (k=0; k<merge->nrecv; k++) { 4961 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4962 nrows = *buf_ri_k[k]; 4963 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4964 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4965 } 4966 4967 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4968 len = 0; 4969 for (i=0; i<m; i++) { 4970 bnzi = 0; 4971 /* add local non-zero cols of this proc's seqmat into lnk */ 4972 arow = owners[rank] + i; 4973 anzi = ai[arow+1] - ai[arow]; 4974 aj = a->j + ai[arow]; 4975 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4976 bnzi += nlnk; 4977 /* add received col data into lnk */ 4978 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4979 if (i == *nextrow[k]) { /* i-th row */ 4980 anzi = *(nextai[k]+1) - *nextai[k]; 4981 aj = buf_rj[k] + *nextai[k]; 4982 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4983 bnzi += nlnk; 4984 nextrow[k]++; nextai[k]++; 4985 } 4986 } 4987 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4988 4989 /* if free space is not available, make more free space */ 4990 if (current_space->local_remaining<bnzi) { 4991 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4992 nspacedouble++; 4993 } 4994 /* copy data into free space, then initialize lnk */ 4995 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4996 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4997 4998 current_space->array += bnzi; 4999 current_space->local_used += bnzi; 5000 current_space->local_remaining -= bnzi; 5001 5002 bi[i+1] = bi[i] + bnzi; 5003 } 5004 5005 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5006 5007 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5008 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5009 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5010 5011 /* create symbolic parallel matrix B_mpi */ 5012 /*---------------------------------------*/ 5013 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5014 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5015 if (n==PETSC_DECIDE) { 5016 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5017 } else { 5018 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5019 } 5020 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5021 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5022 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5023 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5024 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5025 5026 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5027 B_mpi->assembled = PETSC_FALSE; 5028 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5029 merge->bi = bi; 5030 merge->bj = bj; 5031 merge->buf_ri = buf_ri; 5032 merge->buf_rj = buf_rj; 5033 merge->coi = NULL; 5034 merge->coj = NULL; 5035 merge->owners_co = NULL; 5036 5037 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5038 5039 /* attach the supporting struct to B_mpi for reuse */ 5040 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5041 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5042 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5043 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5044 *mpimat = B_mpi; 5045 5046 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5047 PetscFunctionReturn(0); 5048 } 5049 5050 /*@C 5051 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5052 matrices from each processor 5053 5054 Collective 5055 5056 Input Parameters: 5057 + comm - the communicators the parallel matrix will live on 5058 . seqmat - the input sequential matrices 5059 . m - number of local rows (or PETSC_DECIDE) 5060 . n - number of local columns (or PETSC_DECIDE) 5061 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5062 5063 Output Parameter: 5064 . mpimat - the parallel matrix generated 5065 5066 Level: advanced 5067 5068 Notes: 5069 The dimensions of the sequential matrix in each processor MUST be the same. 5070 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5071 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5072 @*/ 5073 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5074 { 5075 PetscErrorCode ierr; 5076 PetscMPIInt size; 5077 5078 PetscFunctionBegin; 5079 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5080 if (size == 1) { 5081 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5082 if (scall == MAT_INITIAL_MATRIX) { 5083 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5084 } else { 5085 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5086 } 5087 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5088 PetscFunctionReturn(0); 5089 } 5090 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5091 if (scall == MAT_INITIAL_MATRIX) { 5092 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5093 } 5094 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5095 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5096 PetscFunctionReturn(0); 5097 } 5098 5099 /*@ 5100 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5101 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5102 with MatGetSize() 5103 5104 Not Collective 5105 5106 Input Parameters: 5107 + A - the matrix 5108 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5109 5110 Output Parameter: 5111 . A_loc - the local sequential matrix generated 5112 5113 Level: developer 5114 5115 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5116 5117 @*/ 5118 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5119 { 5120 PetscErrorCode ierr; 5121 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5122 Mat_SeqAIJ *mat,*a,*b; 5123 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5124 MatScalar *aa,*ba,*cam; 5125 PetscScalar *ca; 5126 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5127 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5128 PetscBool match; 5129 MPI_Comm comm; 5130 PetscMPIInt size; 5131 5132 PetscFunctionBegin; 5133 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5134 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5135 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5136 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5137 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5138 5139 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5140 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5141 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5142 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5143 aa = a->a; ba = b->a; 5144 if (scall == MAT_INITIAL_MATRIX) { 5145 if (size == 1) { 5146 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5147 PetscFunctionReturn(0); 5148 } 5149 5150 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5151 ci[0] = 0; 5152 for (i=0; i<am; i++) { 5153 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5154 } 5155 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5156 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5157 k = 0; 5158 for (i=0; i<am; i++) { 5159 ncols_o = bi[i+1] - bi[i]; 5160 ncols_d = ai[i+1] - ai[i]; 5161 /* off-diagonal portion of A */ 5162 for (jo=0; jo<ncols_o; jo++) { 5163 col = cmap[*bj]; 5164 if (col >= cstart) break; 5165 cj[k] = col; bj++; 5166 ca[k++] = *ba++; 5167 } 5168 /* diagonal portion of A */ 5169 for (j=0; j<ncols_d; j++) { 5170 cj[k] = cstart + *aj++; 5171 ca[k++] = *aa++; 5172 } 5173 /* off-diagonal portion of A */ 5174 for (j=jo; j<ncols_o; j++) { 5175 cj[k] = cmap[*bj++]; 5176 ca[k++] = *ba++; 5177 } 5178 } 5179 /* put together the new matrix */ 5180 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5181 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5182 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5183 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5184 mat->free_a = PETSC_TRUE; 5185 mat->free_ij = PETSC_TRUE; 5186 mat->nonew = 0; 5187 } else if (scall == MAT_REUSE_MATRIX) { 5188 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5189 ci = mat->i; cj = mat->j; cam = mat->a; 5190 for (i=0; i<am; i++) { 5191 /* off-diagonal portion of A */ 5192 ncols_o = bi[i+1] - bi[i]; 5193 for (jo=0; jo<ncols_o; jo++) { 5194 col = cmap[*bj]; 5195 if (col >= cstart) break; 5196 *cam++ = *ba++; bj++; 5197 } 5198 /* diagonal portion of A */ 5199 ncols_d = ai[i+1] - ai[i]; 5200 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5201 /* off-diagonal portion of A */ 5202 for (j=jo; j<ncols_o; j++) { 5203 *cam++ = *ba++; bj++; 5204 } 5205 } 5206 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5207 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5208 PetscFunctionReturn(0); 5209 } 5210 5211 /*@C 5212 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5213 5214 Not Collective 5215 5216 Input Parameters: 5217 + A - the matrix 5218 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5219 - row, col - index sets of rows and columns to extract (or NULL) 5220 5221 Output Parameter: 5222 . A_loc - the local sequential matrix generated 5223 5224 Level: developer 5225 5226 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5227 5228 @*/ 5229 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5230 { 5231 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5232 PetscErrorCode ierr; 5233 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5234 IS isrowa,iscola; 5235 Mat *aloc; 5236 PetscBool match; 5237 5238 PetscFunctionBegin; 5239 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5240 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5241 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5242 if (!row) { 5243 start = A->rmap->rstart; end = A->rmap->rend; 5244 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5245 } else { 5246 isrowa = *row; 5247 } 5248 if (!col) { 5249 start = A->cmap->rstart; 5250 cmap = a->garray; 5251 nzA = a->A->cmap->n; 5252 nzB = a->B->cmap->n; 5253 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5254 ncols = 0; 5255 for (i=0; i<nzB; i++) { 5256 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5257 else break; 5258 } 5259 imark = i; 5260 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5261 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5262 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5263 } else { 5264 iscola = *col; 5265 } 5266 if (scall != MAT_INITIAL_MATRIX) { 5267 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5268 aloc[0] = *A_loc; 5269 } 5270 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5271 if (!col) { /* attach global id of condensed columns */ 5272 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5273 } 5274 *A_loc = aloc[0]; 5275 ierr = PetscFree(aloc);CHKERRQ(ierr); 5276 if (!row) { 5277 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5278 } 5279 if (!col) { 5280 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5281 } 5282 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5283 PetscFunctionReturn(0); 5284 } 5285 5286 /* 5287 * Destroy a mat that may be composed with PetscSF communication objects. 5288 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5289 * */ 5290 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5291 { 5292 PetscSF sf,osf; 5293 IS map; 5294 PetscErrorCode ierr; 5295 5296 PetscFunctionBegin; 5297 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5298 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5299 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5300 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5301 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5302 ierr = ISDestroy(&map);CHKERRQ(ierr); 5303 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5304 PetscFunctionReturn(0); 5305 } 5306 5307 /* 5308 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5309 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5310 * on a global size. 5311 * */ 5312 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5313 { 5314 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5315 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5316 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,owner,lidx,*nrcols,*nlcols,ncol; 5317 PetscSFNode *iremote,*oiremote; 5318 const PetscInt *lrowindices; 5319 PetscErrorCode ierr; 5320 PetscSF sf,osf; 5321 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5322 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5323 MPI_Comm comm; 5324 ISLocalToGlobalMapping mapping; 5325 5326 PetscFunctionBegin; 5327 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5328 /* plocalsize is the number of roots 5329 * nrows is the number of leaves 5330 * */ 5331 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5332 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5333 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5334 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5335 for (i=0;i<nrows;i++) { 5336 /* Find a remote index and an owner for a row 5337 * The row could be local or remote 5338 * */ 5339 owner = 0; 5340 lidx = 0; 5341 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5342 iremote[i].index = lidx; 5343 iremote[i].rank = owner; 5344 } 5345 /* Create SF to communicate how many nonzero columns for each row */ 5346 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5347 /* SF will figure out the number of nonzero colunms for each row, and their 5348 * offsets 5349 * */ 5350 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5351 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5352 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5353 5354 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5355 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5356 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5357 roffsets[0] = 0; 5358 roffsets[1] = 0; 5359 for (i=0;i<plocalsize;i++) { 5360 /* diag */ 5361 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5362 /* off diag */ 5363 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5364 /* compute offsets so that we relative location for each row */ 5365 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5366 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5367 } 5368 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5369 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5370 /* 'r' means root, and 'l' means leaf */ 5371 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5372 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5373 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5374 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5375 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5376 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5377 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5378 dntotalcols = 0; 5379 ontotalcols = 0; 5380 ncol = 0; 5381 for (i=0;i<nrows;i++) { 5382 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5383 ncol = PetscMax(pnnz[i],ncol); 5384 /* diag */ 5385 dntotalcols += nlcols[i*2+0]; 5386 /* off diag */ 5387 ontotalcols += nlcols[i*2+1]; 5388 } 5389 /* We do not need to figure the right number of columns 5390 * since all the calculations will be done by going through the raw data 5391 * */ 5392 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5393 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5394 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5395 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5396 /* diag */ 5397 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5398 /* off diag */ 5399 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5400 /* diag */ 5401 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5402 /* off diag */ 5403 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5404 dntotalcols = 0; 5405 ontotalcols = 0; 5406 ntotalcols = 0; 5407 for (i=0;i<nrows;i++) { 5408 owner = 0; 5409 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5410 /* Set iremote for diag matrix */ 5411 for (j=0;j<nlcols[i*2+0];j++) { 5412 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5413 iremote[dntotalcols].rank = owner; 5414 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5415 ilocal[dntotalcols++] = ntotalcols++; 5416 } 5417 /* off diag */ 5418 for (j=0;j<nlcols[i*2+1];j++) { 5419 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5420 oiremote[ontotalcols].rank = owner; 5421 oilocal[ontotalcols++] = ntotalcols++; 5422 } 5423 } 5424 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5425 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5426 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5427 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5428 /* P serves as roots and P_oth is leaves 5429 * Diag matrix 5430 * */ 5431 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5432 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5433 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5434 5435 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5436 /* Off diag */ 5437 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5438 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5439 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5440 /* We operate on the matrix internal data for saving memory */ 5441 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5442 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5443 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5444 /* Convert to global indices for diag matrix */ 5445 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5446 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5447 /* We want P_oth store global indices */ 5448 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5449 /* Use memory scalable approach */ 5450 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5451 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5452 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5453 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5454 /* Convert back to local indices */ 5455 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5456 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5457 nout = 0; 5458 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5459 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5460 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5461 /* Exchange values */ 5462 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5463 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5464 /* Stop PETSc from shrinking memory */ 5465 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5466 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5467 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5468 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5469 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5470 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5471 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5472 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5473 PetscFunctionReturn(0); 5474 } 5475 5476 /* 5477 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5478 * This supports MPIAIJ and MAIJ 5479 * */ 5480 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5481 { 5482 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5483 Mat_SeqAIJ *p_oth; 5484 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5485 IS rows,map; 5486 PetscHMapI hamp; 5487 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5488 MPI_Comm comm; 5489 PetscSF sf,osf; 5490 PetscBool has; 5491 PetscErrorCode ierr; 5492 5493 PetscFunctionBegin; 5494 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5495 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5496 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5497 * and then create a submatrix (that often is an overlapping matrix) 5498 * */ 5499 if (reuse==MAT_INITIAL_MATRIX) { 5500 /* Use a hash table to figure out unique keys */ 5501 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5502 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5503 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5504 count = 0; 5505 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5506 for (i=0;i<a->B->cmap->n;i++) { 5507 key = a->garray[i]/dof; 5508 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5509 if (!has) { 5510 mapping[i] = count; 5511 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5512 } else { 5513 /* Current 'i' has the same value the previous step */ 5514 mapping[i] = count-1; 5515 } 5516 } 5517 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5518 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5519 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5520 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5521 off = 0; 5522 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5523 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5524 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5525 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5526 /* In case, the matrix was already created but users want to recreate the matrix */ 5527 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5528 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5529 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5530 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5531 } else if (reuse==MAT_REUSE_MATRIX) { 5532 /* If matrix was already created, we simply update values using SF objects 5533 * that as attached to the matrix ealier. 5534 * */ 5535 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5536 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5537 if (!sf || !osf) { 5538 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5539 } 5540 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5541 /* Update values in place */ 5542 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5543 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5544 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5545 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5546 } else { 5547 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5548 } 5549 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5550 PetscFunctionReturn(0); 5551 } 5552 5553 /*@C 5554 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5555 5556 Collective on Mat 5557 5558 Input Parameters: 5559 + A,B - the matrices in mpiaij format 5560 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5561 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5562 5563 Output Parameter: 5564 + rowb, colb - index sets of rows and columns of B to extract 5565 - B_seq - the sequential matrix generated 5566 5567 Level: developer 5568 5569 @*/ 5570 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5571 { 5572 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5573 PetscErrorCode ierr; 5574 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5575 IS isrowb,iscolb; 5576 Mat *bseq=NULL; 5577 5578 PetscFunctionBegin; 5579 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5580 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5581 } 5582 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5583 5584 if (scall == MAT_INITIAL_MATRIX) { 5585 start = A->cmap->rstart; 5586 cmap = a->garray; 5587 nzA = a->A->cmap->n; 5588 nzB = a->B->cmap->n; 5589 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5590 ncols = 0; 5591 for (i=0; i<nzB; i++) { /* row < local row index */ 5592 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5593 else break; 5594 } 5595 imark = i; 5596 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5597 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5598 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5599 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5600 } else { 5601 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5602 isrowb = *rowb; iscolb = *colb; 5603 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5604 bseq[0] = *B_seq; 5605 } 5606 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5607 *B_seq = bseq[0]; 5608 ierr = PetscFree(bseq);CHKERRQ(ierr); 5609 if (!rowb) { 5610 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5611 } else { 5612 *rowb = isrowb; 5613 } 5614 if (!colb) { 5615 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5616 } else { 5617 *colb = iscolb; 5618 } 5619 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5620 PetscFunctionReturn(0); 5621 } 5622 5623 /* 5624 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5625 of the OFF-DIAGONAL portion of local A 5626 5627 Collective on Mat 5628 5629 Input Parameters: 5630 + A,B - the matrices in mpiaij format 5631 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5632 5633 Output Parameter: 5634 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5635 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5636 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5637 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5638 5639 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5640 for this matrix. This is not desirable.. 5641 5642 Level: developer 5643 5644 */ 5645 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5646 { 5647 PetscErrorCode ierr; 5648 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5649 Mat_SeqAIJ *b_oth; 5650 VecScatter ctx; 5651 MPI_Comm comm; 5652 const PetscMPIInt *rprocs,*sprocs; 5653 const PetscInt *srow,*rstarts,*sstarts; 5654 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5655 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5656 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5657 MPI_Request *rwaits = NULL,*swaits = NULL; 5658 MPI_Status rstatus; 5659 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5660 5661 PetscFunctionBegin; 5662 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5663 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5664 5665 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5666 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5667 } 5668 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5669 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5670 5671 if (size == 1) { 5672 startsj_s = NULL; 5673 bufa_ptr = NULL; 5674 *B_oth = NULL; 5675 PetscFunctionReturn(0); 5676 } 5677 5678 ctx = a->Mvctx; 5679 tag = ((PetscObject)ctx)->tag; 5680 5681 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5682 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5683 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5684 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5685 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5686 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5687 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5688 5689 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5690 if (scall == MAT_INITIAL_MATRIX) { 5691 /* i-array */ 5692 /*---------*/ 5693 /* post receives */ 5694 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5695 for (i=0; i<nrecvs; i++) { 5696 rowlen = rvalues + rstarts[i]*rbs; 5697 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5698 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5699 } 5700 5701 /* pack the outgoing message */ 5702 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5703 5704 sstartsj[0] = 0; 5705 rstartsj[0] = 0; 5706 len = 0; /* total length of j or a array to be sent */ 5707 if (nsends) { 5708 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5709 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5710 } 5711 for (i=0; i<nsends; i++) { 5712 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5713 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5714 for (j=0; j<nrows; j++) { 5715 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5716 for (l=0; l<sbs; l++) { 5717 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5718 5719 rowlen[j*sbs+l] = ncols; 5720 5721 len += ncols; 5722 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5723 } 5724 k++; 5725 } 5726 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5727 5728 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5729 } 5730 /* recvs and sends of i-array are completed */ 5731 i = nrecvs; 5732 while (i--) { 5733 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5734 } 5735 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5736 ierr = PetscFree(svalues);CHKERRQ(ierr); 5737 5738 /* allocate buffers for sending j and a arrays */ 5739 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5740 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5741 5742 /* create i-array of B_oth */ 5743 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5744 5745 b_othi[0] = 0; 5746 len = 0; /* total length of j or a array to be received */ 5747 k = 0; 5748 for (i=0; i<nrecvs; i++) { 5749 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5750 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5751 for (j=0; j<nrows; j++) { 5752 b_othi[k+1] = b_othi[k] + rowlen[j]; 5753 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5754 k++; 5755 } 5756 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5757 } 5758 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5759 5760 /* allocate space for j and a arrrays of B_oth */ 5761 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5762 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5763 5764 /* j-array */ 5765 /*---------*/ 5766 /* post receives of j-array */ 5767 for (i=0; i<nrecvs; i++) { 5768 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5769 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5770 } 5771 5772 /* pack the outgoing message j-array */ 5773 if (nsends) k = sstarts[0]; 5774 for (i=0; i<nsends; i++) { 5775 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5776 bufJ = bufj+sstartsj[i]; 5777 for (j=0; j<nrows; j++) { 5778 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5779 for (ll=0; ll<sbs; ll++) { 5780 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5781 for (l=0; l<ncols; l++) { 5782 *bufJ++ = cols[l]; 5783 } 5784 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5785 } 5786 } 5787 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5788 } 5789 5790 /* recvs and sends of j-array are completed */ 5791 i = nrecvs; 5792 while (i--) { 5793 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5794 } 5795 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5796 } else if (scall == MAT_REUSE_MATRIX) { 5797 sstartsj = *startsj_s; 5798 rstartsj = *startsj_r; 5799 bufa = *bufa_ptr; 5800 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5801 b_otha = b_oth->a; 5802 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5803 5804 /* a-array */ 5805 /*---------*/ 5806 /* post receives of a-array */ 5807 for (i=0; i<nrecvs; i++) { 5808 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5809 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5810 } 5811 5812 /* pack the outgoing message a-array */ 5813 if (nsends) k = sstarts[0]; 5814 for (i=0; i<nsends; i++) { 5815 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5816 bufA = bufa+sstartsj[i]; 5817 for (j=0; j<nrows; j++) { 5818 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5819 for (ll=0; ll<sbs; ll++) { 5820 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5821 for (l=0; l<ncols; l++) { 5822 *bufA++ = vals[l]; 5823 } 5824 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5825 } 5826 } 5827 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5828 } 5829 /* recvs and sends of a-array are completed */ 5830 i = nrecvs; 5831 while (i--) { 5832 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5833 } 5834 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5835 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5836 5837 if (scall == MAT_INITIAL_MATRIX) { 5838 /* put together the new matrix */ 5839 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5840 5841 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5842 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5843 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5844 b_oth->free_a = PETSC_TRUE; 5845 b_oth->free_ij = PETSC_TRUE; 5846 b_oth->nonew = 0; 5847 5848 ierr = PetscFree(bufj);CHKERRQ(ierr); 5849 if (!startsj_s || !bufa_ptr) { 5850 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5851 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5852 } else { 5853 *startsj_s = sstartsj; 5854 *startsj_r = rstartsj; 5855 *bufa_ptr = bufa; 5856 } 5857 } 5858 5859 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5860 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5861 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5862 PetscFunctionReturn(0); 5863 } 5864 5865 /*@C 5866 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5867 5868 Not Collective 5869 5870 Input Parameters: 5871 . A - The matrix in mpiaij format 5872 5873 Output Parameter: 5874 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5875 . colmap - A map from global column index to local index into lvec 5876 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5877 5878 Level: developer 5879 5880 @*/ 5881 #if defined(PETSC_USE_CTABLE) 5882 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5883 #else 5884 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5885 #endif 5886 { 5887 Mat_MPIAIJ *a; 5888 5889 PetscFunctionBegin; 5890 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5891 PetscValidPointer(lvec, 2); 5892 PetscValidPointer(colmap, 3); 5893 PetscValidPointer(multScatter, 4); 5894 a = (Mat_MPIAIJ*) A->data; 5895 if (lvec) *lvec = a->lvec; 5896 if (colmap) *colmap = a->colmap; 5897 if (multScatter) *multScatter = a->Mvctx; 5898 PetscFunctionReturn(0); 5899 } 5900 5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5903 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5904 #if defined(PETSC_HAVE_MKL_SPARSE) 5905 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5906 #endif 5907 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5908 #if defined(PETSC_HAVE_ELEMENTAL) 5909 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5910 #endif 5911 #if defined(PETSC_HAVE_HYPRE) 5912 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5913 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5914 #endif 5915 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5916 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5917 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5918 5919 /* 5920 Computes (B'*A')' since computing B*A directly is untenable 5921 5922 n p p 5923 ( ) ( ) ( ) 5924 m ( A ) * n ( B ) = m ( C ) 5925 ( ) ( ) ( ) 5926 5927 */ 5928 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5929 { 5930 PetscErrorCode ierr; 5931 Mat At,Bt,Ct; 5932 5933 PetscFunctionBegin; 5934 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5935 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5936 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5937 ierr = MatDestroy(&At);CHKERRQ(ierr); 5938 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5939 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5940 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5941 PetscFunctionReturn(0); 5942 } 5943 5944 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5945 { 5946 PetscErrorCode ierr; 5947 PetscInt m=A->rmap->n,n=B->cmap->n; 5948 Mat Cmat; 5949 5950 PetscFunctionBegin; 5951 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5952 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5953 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5954 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5955 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5956 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5957 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5958 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5959 5960 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5961 5962 *C = Cmat; 5963 PetscFunctionReturn(0); 5964 } 5965 5966 /* ----------------------------------------------------------------*/ 5967 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5968 { 5969 PetscErrorCode ierr; 5970 5971 PetscFunctionBegin; 5972 if (scall == MAT_INITIAL_MATRIX) { 5973 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5974 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5975 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5976 } 5977 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5978 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5979 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5980 PetscFunctionReturn(0); 5981 } 5982 5983 /*MC 5984 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5985 5986 Options Database Keys: 5987 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5988 5989 Level: beginner 5990 5991 Notes: 5992 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5993 in this case the values associated with the rows and columns one passes in are set to zero 5994 in the matrix 5995 5996 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5997 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5998 5999 .seealso: MatCreateAIJ() 6000 M*/ 6001 6002 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6003 { 6004 Mat_MPIAIJ *b; 6005 PetscErrorCode ierr; 6006 PetscMPIInt size; 6007 6008 PetscFunctionBegin; 6009 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 6010 6011 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6012 B->data = (void*)b; 6013 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6014 B->assembled = PETSC_FALSE; 6015 B->insertmode = NOT_SET_VALUES; 6016 b->size = size; 6017 6018 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 6019 6020 /* build cache for off array entries formed */ 6021 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6022 6023 b->donotstash = PETSC_FALSE; 6024 b->colmap = 0; 6025 b->garray = 0; 6026 b->roworiented = PETSC_TRUE; 6027 6028 /* stuff used for matrix vector multiply */ 6029 b->lvec = NULL; 6030 b->Mvctx = NULL; 6031 6032 /* stuff for MatGetRow() */ 6033 b->rowindices = 0; 6034 b->rowvalues = 0; 6035 b->getrowactive = PETSC_FALSE; 6036 6037 /* flexible pointer used in CUSP/CUSPARSE classes */ 6038 b->spptr = NULL; 6039 6040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6050 #if defined(PETSC_HAVE_MKL_SPARSE) 6051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6052 #endif 6053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6055 #if defined(PETSC_HAVE_ELEMENTAL) 6056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6057 #endif 6058 #if defined(PETSC_HAVE_HYPRE) 6059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6060 #endif 6061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 6064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 6065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 6066 #if defined(PETSC_HAVE_HYPRE) 6067 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6068 #endif 6069 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 6070 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6071 PetscFunctionReturn(0); 6072 } 6073 6074 /*@C 6075 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6076 and "off-diagonal" part of the matrix in CSR format. 6077 6078 Collective 6079 6080 Input Parameters: 6081 + comm - MPI communicator 6082 . m - number of local rows (Cannot be PETSC_DECIDE) 6083 . n - This value should be the same as the local size used in creating the 6084 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6085 calculated if N is given) For square matrices n is almost always m. 6086 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6087 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6088 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6089 . j - column indices 6090 . a - matrix values 6091 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6092 . oj - column indices 6093 - oa - matrix values 6094 6095 Output Parameter: 6096 . mat - the matrix 6097 6098 Level: advanced 6099 6100 Notes: 6101 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6102 must free the arrays once the matrix has been destroyed and not before. 6103 6104 The i and j indices are 0 based 6105 6106 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6107 6108 This sets local rows and cannot be used to set off-processor values. 6109 6110 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6111 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6112 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6113 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6114 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6115 communication if it is known that only local entries will be set. 6116 6117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6118 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6119 @*/ 6120 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6121 { 6122 PetscErrorCode ierr; 6123 Mat_MPIAIJ *maij; 6124 6125 PetscFunctionBegin; 6126 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6127 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6128 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6129 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6130 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6131 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6132 maij = (Mat_MPIAIJ*) (*mat)->data; 6133 6134 (*mat)->preallocated = PETSC_TRUE; 6135 6136 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6137 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6138 6139 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6140 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6141 6142 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6143 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6144 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6145 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6146 6147 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6148 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6149 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6150 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6151 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6152 PetscFunctionReturn(0); 6153 } 6154 6155 /* 6156 Special version for direct calls from Fortran 6157 */ 6158 #include <petsc/private/fortranimpl.h> 6159 6160 /* Change these macros so can be used in void function */ 6161 #undef CHKERRQ 6162 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6163 #undef SETERRQ2 6164 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6165 #undef SETERRQ3 6166 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6167 #undef SETERRQ 6168 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6169 6170 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6171 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6172 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6173 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6174 #else 6175 #endif 6176 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6177 { 6178 Mat mat = *mmat; 6179 PetscInt m = *mm, n = *mn; 6180 InsertMode addv = *maddv; 6181 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6182 PetscScalar value; 6183 PetscErrorCode ierr; 6184 6185 MatCheckPreallocated(mat,1); 6186 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6187 6188 #if defined(PETSC_USE_DEBUG) 6189 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6190 #endif 6191 { 6192 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6193 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6194 PetscBool roworiented = aij->roworiented; 6195 6196 /* Some Variables required in the macro */ 6197 Mat A = aij->A; 6198 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6199 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6200 MatScalar *aa = a->a; 6201 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6202 Mat B = aij->B; 6203 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6204 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6205 MatScalar *ba = b->a; 6206 6207 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6208 PetscInt nonew = a->nonew; 6209 MatScalar *ap1,*ap2; 6210 6211 PetscFunctionBegin; 6212 for (i=0; i<m; i++) { 6213 if (im[i] < 0) continue; 6214 #if defined(PETSC_USE_DEBUG) 6215 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6216 #endif 6217 if (im[i] >= rstart && im[i] < rend) { 6218 row = im[i] - rstart; 6219 lastcol1 = -1; 6220 rp1 = aj + ai[row]; 6221 ap1 = aa + ai[row]; 6222 rmax1 = aimax[row]; 6223 nrow1 = ailen[row]; 6224 low1 = 0; 6225 high1 = nrow1; 6226 lastcol2 = -1; 6227 rp2 = bj + bi[row]; 6228 ap2 = ba + bi[row]; 6229 rmax2 = bimax[row]; 6230 nrow2 = bilen[row]; 6231 low2 = 0; 6232 high2 = nrow2; 6233 6234 for (j=0; j<n; j++) { 6235 if (roworiented) value = v[i*n+j]; 6236 else value = v[i+j*m]; 6237 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6238 if (in[j] >= cstart && in[j] < cend) { 6239 col = in[j] - cstart; 6240 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6241 } else if (in[j] < 0) continue; 6242 #if defined(PETSC_USE_DEBUG) 6243 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6244 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6245 #endif 6246 else { 6247 if (mat->was_assembled) { 6248 if (!aij->colmap) { 6249 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6250 } 6251 #if defined(PETSC_USE_CTABLE) 6252 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6253 col--; 6254 #else 6255 col = aij->colmap[in[j]] - 1; 6256 #endif 6257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6258 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6259 col = in[j]; 6260 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6261 B = aij->B; 6262 b = (Mat_SeqAIJ*)B->data; 6263 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6264 rp2 = bj + bi[row]; 6265 ap2 = ba + bi[row]; 6266 rmax2 = bimax[row]; 6267 nrow2 = bilen[row]; 6268 low2 = 0; 6269 high2 = nrow2; 6270 bm = aij->B->rmap->n; 6271 ba = b->a; 6272 } 6273 } else col = in[j]; 6274 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6275 } 6276 } 6277 } else if (!aij->donotstash) { 6278 if (roworiented) { 6279 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6280 } else { 6281 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6282 } 6283 } 6284 } 6285 } 6286 PetscFunctionReturnVoid(); 6287 } 6288