1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->pinnedtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 goto a_noinsert; \ 468 } \ 469 } \ 470 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 471 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 472 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 473 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 474 N = nrow1++ - 1; a->nz++; high1++; \ 475 /* shift up all the later entries in this row */ \ 476 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 477 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 478 rp1[_i] = col; \ 479 ap1[_i] = value; \ 480 A->nonzerostate++;\ 481 a_noinsert: ; \ 482 ailen[row] = nrow1; \ 483 } 484 485 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 486 { \ 487 if (col <= lastcol2) low2 = 0; \ 488 else high2 = nrow2; \ 489 lastcol2 = col; \ 490 while (high2-low2 > 5) { \ 491 t = (low2+high2)/2; \ 492 if (rp2[t] > col) high2 = t; \ 493 else low2 = t; \ 494 } \ 495 for (_i=low2; _i<high2; _i++) { \ 496 if (rp2[_i] > col) break; \ 497 if (rp2[_i] == col) { \ 498 if (addv == ADD_VALUES) { \ 499 ap2[_i] += value; \ 500 (void)PetscLogFlops(1.0); \ 501 } \ 502 else ap2[_i] = value; \ 503 goto b_noinsert; \ 504 } \ 505 } \ 506 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 507 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 508 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 509 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 510 N = nrow2++ - 1; b->nz++; high2++; \ 511 /* shift up all the later entries in this row */ \ 512 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 513 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 514 rp2[_i] = col; \ 515 ap2[_i] = value; \ 516 B->nonzerostate++; \ 517 b_noinsert: ; \ 518 bilen[row] = nrow2; \ 519 } 520 521 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 522 { 523 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 524 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 525 PetscErrorCode ierr; 526 PetscInt l,*garray = mat->garray,diag; 527 528 PetscFunctionBegin; 529 /* code only works for square matrices A */ 530 531 /* find size of row to the left of the diagonal part */ 532 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 533 row = row - diag; 534 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 535 if (garray[b->j[b->i[row]+l]] > diag) break; 536 } 537 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 538 539 /* diagonal part */ 540 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 541 542 /* right of diagonal part */ 543 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 548 { 549 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 550 PetscScalar value = 0.0; 551 PetscErrorCode ierr; 552 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 553 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 554 PetscBool roworiented = aij->roworiented; 555 556 /* Some Variables required in the macro */ 557 Mat A = aij->A; 558 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 559 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 560 MatScalar *aa = a->a; 561 PetscBool ignorezeroentries = a->ignorezeroentries; 562 Mat B = aij->B; 563 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 564 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 565 MatScalar *ba = b->a; 566 567 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 568 PetscInt nonew; 569 MatScalar *ap1,*ap2; 570 571 PetscFunctionBegin; 572 for (i=0; i<m; i++) { 573 if (im[i] < 0) continue; 574 #if defined(PETSC_USE_DEBUG) 575 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 576 #endif 577 if (im[i] >= rstart && im[i] < rend) { 578 row = im[i] - rstart; 579 lastcol1 = -1; 580 rp1 = aj + ai[row]; 581 ap1 = aa + ai[row]; 582 rmax1 = aimax[row]; 583 nrow1 = ailen[row]; 584 low1 = 0; 585 high1 = nrow1; 586 lastcol2 = -1; 587 rp2 = bj + bi[row]; 588 ap2 = ba + bi[row]; 589 rmax2 = bimax[row]; 590 nrow2 = bilen[row]; 591 low2 = 0; 592 high2 = nrow2; 593 594 for (j=0; j<n; j++) { 595 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 596 if (in[j] >= cstart && in[j] < cend) { 597 col = in[j] - cstart; 598 nonew = a->nonew; 599 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 600 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 601 } else if (in[j] < 0) continue; 602 #if defined(PETSC_USE_DEBUG) 603 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 604 #endif 605 else { 606 if (mat->was_assembled) { 607 if (!aij->colmap) { 608 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 609 } 610 #if defined(PETSC_USE_CTABLE) 611 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 612 col--; 613 #else 614 col = aij->colmap[in[j]] - 1; 615 #endif 616 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 617 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 618 col = in[j]; 619 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 620 B = aij->B; 621 b = (Mat_SeqAIJ*)B->data; 622 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 623 rp2 = bj + bi[row]; 624 ap2 = ba + bi[row]; 625 rmax2 = bimax[row]; 626 nrow2 = bilen[row]; 627 low2 = 0; 628 high2 = nrow2; 629 bm = aij->B->rmap->n; 630 ba = b->a; 631 } else if (col < 0) { 632 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 633 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 634 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 635 } 636 } else col = in[j]; 637 nonew = b->nonew; 638 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 639 } 640 } 641 } else { 642 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 643 if (!aij->donotstash) { 644 mat->assembled = PETSC_FALSE; 645 if (roworiented) { 646 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 647 } else { 648 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 649 } 650 } 651 } 652 } 653 PetscFunctionReturn(0); 654 } 655 656 /* 657 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 658 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 659 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 660 */ 661 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 662 { 663 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 664 Mat A = aij->A; /* diagonal part of the matrix */ 665 Mat B = aij->B; /* offdiagonal part of the matrix */ 666 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 667 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 668 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 669 PetscInt *ailen = a->ilen,*aj = a->j; 670 PetscInt *bilen = b->ilen,*bj = b->j; 671 PetscInt am = aij->A->rmap->n,j; 672 PetscInt diag_so_far = 0,dnz; 673 PetscInt offd_so_far = 0,onz; 674 675 PetscFunctionBegin; 676 /* Iterate over all rows of the matrix */ 677 for (j=0; j<am; j++) { 678 dnz = onz = 0; 679 /* Iterate over all non-zero columns of the current row */ 680 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 681 /* If column is in the diagonal */ 682 if (mat_j[col] >= cstart && mat_j[col] < cend) { 683 aj[diag_so_far++] = mat_j[col] - cstart; 684 dnz++; 685 } else { /* off-diagonal entries */ 686 bj[offd_so_far++] = mat_j[col]; 687 onz++; 688 } 689 } 690 ailen[j] = dnz; 691 bilen[j] = onz; 692 } 693 PetscFunctionReturn(0); 694 } 695 696 /* 697 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 698 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 699 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 700 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 701 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 702 */ 703 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 704 { 705 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 706 Mat A = aij->A; /* diagonal part of the matrix */ 707 Mat B = aij->B; /* offdiagonal part of the matrix */ 708 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 710 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 711 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 712 PetscInt *ailen = a->ilen,*aj = a->j; 713 PetscInt *bilen = b->ilen,*bj = b->j; 714 PetscInt am = aij->A->rmap->n,j; 715 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 716 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 717 PetscScalar *aa = a->a,*ba = b->a; 718 719 PetscFunctionBegin; 720 /* Iterate over all rows of the matrix */ 721 for (j=0; j<am; j++) { 722 dnz_row = onz_row = 0; 723 rowstart_offd = full_offd_i[j]; 724 rowstart_diag = full_diag_i[j]; 725 /* Iterate over all non-zero columns of the current row */ 726 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 727 /* If column is in the diagonal */ 728 if (mat_j[col] >= cstart && mat_j[col] < cend) { 729 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 730 aa[rowstart_diag+dnz_row] = mat_a[col]; 731 dnz_row++; 732 } else { /* off-diagonal entries */ 733 bj[rowstart_offd+onz_row] = mat_j[col]; 734 ba[rowstart_offd+onz_row] = mat_a[col]; 735 onz_row++; 736 } 737 } 738 ailen[j] = dnz_row; 739 bilen[j] = onz_row; 740 } 741 PetscFunctionReturn(0); 742 } 743 744 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 745 { 746 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 747 PetscErrorCode ierr; 748 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 749 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 750 751 PetscFunctionBegin; 752 for (i=0; i<m; i++) { 753 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 754 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 755 if (idxm[i] >= rstart && idxm[i] < rend) { 756 row = idxm[i] - rstart; 757 for (j=0; j<n; j++) { 758 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 759 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 760 if (idxn[j] >= cstart && idxn[j] < cend) { 761 col = idxn[j] - cstart; 762 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 763 } else { 764 if (!aij->colmap) { 765 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 766 } 767 #if defined(PETSC_USE_CTABLE) 768 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 769 col--; 770 #else 771 col = aij->colmap[idxn[j]] - 1; 772 #endif 773 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 774 else { 775 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 776 } 777 } 778 } 779 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 785 786 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 787 { 788 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 789 PetscErrorCode ierr; 790 PetscInt nstash,reallocs; 791 792 PetscFunctionBegin; 793 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 794 795 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 796 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 797 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 798 PetscFunctionReturn(0); 799 } 800 801 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 804 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 805 PetscErrorCode ierr; 806 PetscMPIInt n; 807 PetscInt i,j,rstart,ncols,flg; 808 PetscInt *row,*col; 809 PetscBool other_disassembled; 810 PetscScalar *val; 811 812 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 813 814 PetscFunctionBegin; 815 if (!aij->donotstash && !mat->nooffprocentries) { 816 while (1) { 817 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 818 if (!flg) break; 819 820 for (i=0; i<n; ) { 821 /* Now identify the consecutive vals belonging to the same row */ 822 for (j=i,rstart=row[j]; j<n; j++) { 823 if (row[j] != rstart) break; 824 } 825 if (j < n) ncols = j-i; 826 else ncols = n-i; 827 /* Now assemble all these values with a single function call */ 828 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 829 830 i = j; 831 } 832 } 833 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 834 } 835 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 836 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 837 #endif 838 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 839 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 840 841 /* determine if any processor has disassembled, if so we must 842 also disassemble ourself, in order that we may reassemble. */ 843 /* 844 if nonzero structure of submatrix B cannot change then we know that 845 no processor disassembled thus we can skip this stuff 846 */ 847 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 848 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 849 if (mat->was_assembled && !other_disassembled) { 850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 851 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 852 #endif 853 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 854 } 855 } 856 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 857 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 858 } 859 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 860 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 861 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 862 #endif 863 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 864 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 865 866 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 867 868 aij->rowvalues = 0; 869 870 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 871 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 872 873 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 874 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 875 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 876 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 877 } 878 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 879 mat->offloadmask = PETSC_OFFLOAD_BOTH; 880 #endif 881 PetscFunctionReturn(0); 882 } 883 884 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 885 { 886 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 891 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 896 { 897 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 898 PetscObjectState sA, sB; 899 PetscInt *lrows; 900 PetscInt r, len; 901 PetscBool cong, lch, gch; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 /* get locally owned rows */ 906 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 907 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 908 /* fix right hand side if needed */ 909 if (x && b) { 910 const PetscScalar *xx; 911 PetscScalar *bb; 912 913 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 914 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 915 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 916 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 917 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 918 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 919 } 920 921 sA = mat->A->nonzerostate; 922 sB = mat->B->nonzerostate; 923 924 if (diag != 0.0 && cong) { 925 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 926 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 927 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 928 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 929 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 930 PetscInt nnwA, nnwB; 931 PetscBool nnzA, nnzB; 932 933 nnwA = aijA->nonew; 934 nnwB = aijB->nonew; 935 nnzA = aijA->keepnonzeropattern; 936 nnzB = aijB->keepnonzeropattern; 937 if (!nnzA) { 938 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 939 aijA->nonew = 0; 940 } 941 if (!nnzB) { 942 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 943 aijB->nonew = 0; 944 } 945 /* Must zero here before the next loop */ 946 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 947 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 948 for (r = 0; r < len; ++r) { 949 const PetscInt row = lrows[r] + A->rmap->rstart; 950 if (row >= A->cmap->N) continue; 951 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 952 } 953 aijA->nonew = nnwA; 954 aijB->nonew = nnwB; 955 } else { 956 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 957 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 958 } 959 ierr = PetscFree(lrows);CHKERRQ(ierr); 960 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 961 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 962 963 /* reduce nonzerostate */ 964 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 965 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 966 if (gch) A->nonzerostate++; 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 971 { 972 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 973 PetscErrorCode ierr; 974 PetscMPIInt n = A->rmap->n; 975 PetscInt i,j,r,m,p = 0,len = 0; 976 PetscInt *lrows,*owners = A->rmap->range; 977 PetscSFNode *rrows; 978 PetscSF sf; 979 const PetscScalar *xx; 980 PetscScalar *bb,*mask; 981 Vec xmask,lmask; 982 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 983 const PetscInt *aj, *ii,*ridx; 984 PetscScalar *aa; 985 986 PetscFunctionBegin; 987 /* Create SF where leaves are input rows and roots are owned rows */ 988 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 989 for (r = 0; r < n; ++r) lrows[r] = -1; 990 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 991 for (r = 0; r < N; ++r) { 992 const PetscInt idx = rows[r]; 993 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 994 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 995 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 996 } 997 rrows[r].rank = p; 998 rrows[r].index = rows[r] - owners[p]; 999 } 1000 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1001 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1002 /* Collect flags for rows to be zeroed */ 1003 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1004 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1005 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1006 /* Compress and put in row numbers */ 1007 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1008 /* zero diagonal part of matrix */ 1009 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1010 /* handle off diagonal part of matrix */ 1011 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1012 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1013 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1014 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1015 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1016 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1017 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1018 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1019 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1020 PetscBool cong; 1021 1022 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1023 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1024 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1027 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1028 } 1029 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1030 /* remove zeroed rows of off diagonal matrix */ 1031 ii = aij->i; 1032 for (i=0; i<len; i++) { 1033 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1034 } 1035 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1036 if (aij->compressedrow.use) { 1037 m = aij->compressedrow.nrows; 1038 ii = aij->compressedrow.i; 1039 ridx = aij->compressedrow.rindex; 1040 for (i=0; i<m; i++) { 1041 n = ii[i+1] - ii[i]; 1042 aj = aij->j + ii[i]; 1043 aa = aij->a + ii[i]; 1044 1045 for (j=0; j<n; j++) { 1046 if (PetscAbsScalar(mask[*aj])) { 1047 if (b) bb[*ridx] -= *aa*xx[*aj]; 1048 *aa = 0.0; 1049 } 1050 aa++; 1051 aj++; 1052 } 1053 ridx++; 1054 } 1055 } else { /* do not use compressed row format */ 1056 m = l->B->rmap->n; 1057 for (i=0; i<m; i++) { 1058 n = ii[i+1] - ii[i]; 1059 aj = aij->j + ii[i]; 1060 aa = aij->a + ii[i]; 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[i] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 } 1070 } 1071 if (x && b) { 1072 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1073 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1074 } 1075 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1076 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1077 ierr = PetscFree(lrows);CHKERRQ(ierr); 1078 1079 /* only change matrix nonzero state if pattern was allowed to be changed */ 1080 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1081 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1082 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1088 { 1089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 PetscInt nt; 1092 VecScatter Mvctx = a->Mvctx; 1093 1094 PetscFunctionBegin; 1095 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1096 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1097 1098 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1099 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1100 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1101 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1116 { 1117 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1118 PetscErrorCode ierr; 1119 VecScatter Mvctx = a->Mvctx; 1120 1121 PetscFunctionBegin; 1122 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1123 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1124 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1125 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1126 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 1135 PetscFunctionBegin; 1136 /* do nondiagonal part */ 1137 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* add partial results together */ 1141 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1147 { 1148 MPI_Comm comm; 1149 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1150 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1151 IS Me,Notme; 1152 PetscErrorCode ierr; 1153 PetscInt M,N,first,last,*notme,i; 1154 PetscBool lf; 1155 PetscMPIInt size; 1156 1157 PetscFunctionBegin; 1158 /* Easy test: symmetric diagonal block */ 1159 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1160 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1161 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1162 if (!*f) PetscFunctionReturn(0); 1163 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1164 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1165 if (size == 1) PetscFunctionReturn(0); 1166 1167 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1168 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1169 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1170 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1171 for (i=0; i<first; i++) notme[i] = i; 1172 for (i=last; i<M; i++) notme[i-last+first] = i; 1173 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1174 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1175 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1176 Aoff = Aoffs[0]; 1177 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1178 Boff = Boffs[0]; 1179 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1180 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1181 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1182 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1183 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1184 ierr = PetscFree(notme);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1189 { 1190 PetscErrorCode ierr; 1191 1192 PetscFunctionBegin; 1193 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1198 { 1199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 /* do nondiagonal part */ 1204 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1205 /* do local part */ 1206 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1207 /* add partial results together */ 1208 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1218 { 1219 PetscErrorCode ierr; 1220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1221 1222 PetscFunctionBegin; 1223 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1224 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1225 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1230 { 1231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1236 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1241 { 1242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1243 PetscErrorCode ierr; 1244 1245 PetscFunctionBegin; 1246 #if defined(PETSC_USE_LOG) 1247 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1248 #endif 1249 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1250 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1251 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1252 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1253 #if defined(PETSC_USE_CTABLE) 1254 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1255 #else 1256 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1257 #endif 1258 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1259 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1260 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1261 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1262 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1263 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1264 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1265 1266 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1267 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1276 #if defined(PETSC_HAVE_ELEMENTAL) 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1278 #endif 1279 #if defined(PETSC_HAVE_HYPRE) 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1282 #endif 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1289 { 1290 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1291 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1292 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1293 PetscErrorCode ierr; 1294 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1295 int fd; 1296 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1297 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1298 PetscScalar *column_values; 1299 PetscInt message_count,flowcontrolcount; 1300 FILE *file; 1301 1302 PetscFunctionBegin; 1303 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1304 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1305 nz = A->nz + B->nz; 1306 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1307 if (!rank) { 1308 header[0] = MAT_FILE_CLASSID; 1309 header[1] = mat->rmap->N; 1310 header[2] = mat->cmap->N; 1311 1312 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1313 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1314 /* get largest number of rows any processor has */ 1315 rlen = mat->rmap->n; 1316 range = mat->rmap->range; 1317 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1318 } else { 1319 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1320 rlen = mat->rmap->n; 1321 } 1322 1323 /* load up the local row counts */ 1324 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1325 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1326 1327 /* store the row lengths to the file */ 1328 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1329 if (!rank) { 1330 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1331 for (i=1; i<size; i++) { 1332 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1333 rlen = range[i+1] - range[i]; 1334 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1335 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 } 1337 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1338 } else { 1339 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1340 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1342 } 1343 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1344 1345 /* load up the local column indices */ 1346 nzmax = nz; /* th processor needs space a largest processor needs */ 1347 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1348 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1349 cnt = 0; 1350 for (i=0; i<mat->rmap->n; i++) { 1351 for (j=B->i[i]; j<B->i[i+1]; j++) { 1352 if ((col = garray[B->j[j]]) > cstart) break; 1353 column_indices[cnt++] = col; 1354 } 1355 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1356 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1357 } 1358 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1359 1360 /* store the column indices to the file */ 1361 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1362 if (!rank) { 1363 MPI_Status status; 1364 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1365 for (i=1; i<size; i++) { 1366 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1367 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1368 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1369 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1370 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1371 } 1372 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1373 } else { 1374 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1375 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1376 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1377 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1378 } 1379 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1380 1381 /* load up the local column values */ 1382 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1383 cnt = 0; 1384 for (i=0; i<mat->rmap->n; i++) { 1385 for (j=B->i[i]; j<B->i[i+1]; j++) { 1386 if (garray[B->j[j]] > cstart) break; 1387 column_values[cnt++] = B->a[j]; 1388 } 1389 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1390 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1391 } 1392 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1393 1394 /* store the column values to the file */ 1395 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1396 if (!rank) { 1397 MPI_Status status; 1398 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1399 for (i=1; i<size; i++) { 1400 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1401 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1402 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1403 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1404 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1405 } 1406 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1407 } else { 1408 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1409 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1410 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1411 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1412 } 1413 ierr = PetscFree(column_values);CHKERRQ(ierr); 1414 1415 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1416 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #include <petscdraw.h> 1421 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1422 { 1423 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1424 PetscErrorCode ierr; 1425 PetscMPIInt rank = aij->rank,size = aij->size; 1426 PetscBool isdraw,iascii,isbinary; 1427 PetscViewer sviewer; 1428 PetscViewerFormat format; 1429 1430 PetscFunctionBegin; 1431 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1432 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1433 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1434 if (iascii) { 1435 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1436 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1437 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1438 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1439 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1440 for (i=0; i<(PetscInt)size; i++) { 1441 nmax = PetscMax(nmax,nz[i]); 1442 nmin = PetscMin(nmin,nz[i]); 1443 navg += nz[i]; 1444 } 1445 ierr = PetscFree(nz);CHKERRQ(ierr); 1446 navg = navg/size; 1447 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1451 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1452 MatInfo info; 1453 PetscBool inodes; 1454 1455 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1456 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1457 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1458 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1459 if (!inodes) { 1460 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1461 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1462 } else { 1463 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1464 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1465 } 1466 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1468 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1470 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1471 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1472 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1473 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1476 PetscInt inodecount,inodelimit,*inodes; 1477 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1478 if (inodes) { 1479 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1480 } else { 1481 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1485 PetscFunctionReturn(0); 1486 } 1487 } else if (isbinary) { 1488 if (size == 1) { 1489 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1490 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1491 } else { 1492 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1493 } 1494 PetscFunctionReturn(0); 1495 } else if (iascii && size == 1) { 1496 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1497 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } else if (isdraw) { 1500 PetscDraw draw; 1501 PetscBool isnull; 1502 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1503 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1504 if (isnull) PetscFunctionReturn(0); 1505 } 1506 1507 { /* assemble the entire matrix onto first processor */ 1508 Mat A = NULL, Av; 1509 IS isrow,iscol; 1510 1511 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1512 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1513 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1514 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1515 /* The commented code uses MatCreateSubMatrices instead */ 1516 /* 1517 Mat *AA, A = NULL, Av; 1518 IS isrow,iscol; 1519 1520 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1521 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1522 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1523 if (!rank) { 1524 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1525 A = AA[0]; 1526 Av = AA[0]; 1527 } 1528 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1529 */ 1530 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1531 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1532 /* 1533 Everyone has to call to draw the matrix since the graphics waits are 1534 synchronized across all processors that share the PetscDraw object 1535 */ 1536 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1537 if (!rank) { 1538 if (((PetscObject)mat)->name) { 1539 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1540 } 1541 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1542 } 1543 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1544 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1545 ierr = MatDestroy(&A);CHKERRQ(ierr); 1546 } 1547 PetscFunctionReturn(0); 1548 } 1549 1550 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1551 { 1552 PetscErrorCode ierr; 1553 PetscBool iascii,isdraw,issocket,isbinary; 1554 1555 PetscFunctionBegin; 1556 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1557 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1558 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1559 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1560 if (iascii || isdraw || isbinary || issocket) { 1561 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1562 } 1563 PetscFunctionReturn(0); 1564 } 1565 1566 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1567 { 1568 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1569 PetscErrorCode ierr; 1570 Vec bb1 = 0; 1571 PetscBool hasop; 1572 1573 PetscFunctionBegin; 1574 if (flag == SOR_APPLY_UPPER) { 1575 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1580 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1581 } 1582 1583 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1584 if (flag & SOR_ZERO_INITIAL_GUESS) { 1585 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1586 its--; 1587 } 1588 1589 while (its--) { 1590 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1591 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1592 1593 /* update rhs: bb1 = bb - B*x */ 1594 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1595 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1596 1597 /* local sweep */ 1598 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1599 } 1600 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1601 if (flag & SOR_ZERO_INITIAL_GUESS) { 1602 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1603 its--; 1604 } 1605 while (its--) { 1606 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1607 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1608 1609 /* update rhs: bb1 = bb - B*x */ 1610 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1611 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1612 1613 /* local sweep */ 1614 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1615 } 1616 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1617 if (flag & SOR_ZERO_INITIAL_GUESS) { 1618 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1619 its--; 1620 } 1621 while (its--) { 1622 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1623 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 1625 /* update rhs: bb1 = bb - B*x */ 1626 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1627 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1628 1629 /* local sweep */ 1630 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1631 } 1632 } else if (flag & SOR_EISENSTAT) { 1633 Vec xx1; 1634 1635 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1637 1638 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1639 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 if (!mat->diag) { 1641 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1642 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1643 } 1644 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1645 if (hasop) { 1646 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1647 } else { 1648 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1649 } 1650 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1651 1652 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1653 1654 /* local sweep */ 1655 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1656 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1657 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1658 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1659 1660 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1661 1662 matin->factorerrortype = mat->A->factorerrortype; 1663 PetscFunctionReturn(0); 1664 } 1665 1666 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1667 { 1668 Mat aA,aB,Aperm; 1669 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1670 PetscScalar *aa,*ba; 1671 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1672 PetscSF rowsf,sf; 1673 IS parcolp = NULL; 1674 PetscBool done; 1675 PetscErrorCode ierr; 1676 1677 PetscFunctionBegin; 1678 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1679 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1680 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1681 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1682 1683 /* Invert row permutation to find out where my rows should go */ 1684 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1685 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1686 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1687 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1688 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1689 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1690 1691 /* Invert column permutation to find out where my columns should go */ 1692 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1693 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1694 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1695 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1696 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1697 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1698 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1699 1700 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1701 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1702 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1703 1704 /* Find out where my gcols should go */ 1705 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1706 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1707 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1708 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1709 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1710 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1711 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1712 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1713 1714 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1715 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1716 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1717 for (i=0; i<m; i++) { 1718 PetscInt row = rdest[i],rowner; 1719 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1720 for (j=ai[i]; j<ai[i+1]; j++) { 1721 PetscInt cowner,col = cdest[aj[j]]; 1722 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1723 if (rowner == cowner) dnnz[i]++; 1724 else onnz[i]++; 1725 } 1726 for (j=bi[i]; j<bi[i+1]; j++) { 1727 PetscInt cowner,col = gcdest[bj[j]]; 1728 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1729 if (rowner == cowner) dnnz[i]++; 1730 else onnz[i]++; 1731 } 1732 } 1733 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1734 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1735 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1736 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1737 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1738 1739 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1740 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1741 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1742 for (i=0; i<m; i++) { 1743 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1744 PetscInt j0,rowlen; 1745 rowlen = ai[i+1] - ai[i]; 1746 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1747 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1748 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1749 } 1750 rowlen = bi[i+1] - bi[i]; 1751 for (j0=j=0; j<rowlen; j0=j) { 1752 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1753 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1754 } 1755 } 1756 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1757 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1758 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1759 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1760 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1761 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1762 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1763 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1764 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1765 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1766 *B = Aperm; 1767 PetscFunctionReturn(0); 1768 } 1769 1770 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1771 { 1772 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1773 PetscErrorCode ierr; 1774 1775 PetscFunctionBegin; 1776 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1777 if (ghosts) *ghosts = aij->garray; 1778 PetscFunctionReturn(0); 1779 } 1780 1781 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1782 { 1783 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1784 Mat A = mat->A,B = mat->B; 1785 PetscErrorCode ierr; 1786 PetscLogDouble isend[5],irecv[5]; 1787 1788 PetscFunctionBegin; 1789 info->block_size = 1.0; 1790 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1791 1792 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1793 isend[3] = info->memory; isend[4] = info->mallocs; 1794 1795 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1796 1797 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1798 isend[3] += info->memory; isend[4] += info->mallocs; 1799 if (flag == MAT_LOCAL) { 1800 info->nz_used = isend[0]; 1801 info->nz_allocated = isend[1]; 1802 info->nz_unneeded = isend[2]; 1803 info->memory = isend[3]; 1804 info->mallocs = isend[4]; 1805 } else if (flag == MAT_GLOBAL_MAX) { 1806 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1807 1808 info->nz_used = irecv[0]; 1809 info->nz_allocated = irecv[1]; 1810 info->nz_unneeded = irecv[2]; 1811 info->memory = irecv[3]; 1812 info->mallocs = irecv[4]; 1813 } else if (flag == MAT_GLOBAL_SUM) { 1814 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1815 1816 info->nz_used = irecv[0]; 1817 info->nz_allocated = irecv[1]; 1818 info->nz_unneeded = irecv[2]; 1819 info->memory = irecv[3]; 1820 info->mallocs = irecv[4]; 1821 } 1822 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1823 info->fill_ratio_needed = 0; 1824 info->factor_mallocs = 0; 1825 PetscFunctionReturn(0); 1826 } 1827 1828 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1829 { 1830 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1831 PetscErrorCode ierr; 1832 1833 PetscFunctionBegin; 1834 switch (op) { 1835 case MAT_NEW_NONZERO_LOCATIONS: 1836 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1837 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1838 case MAT_KEEP_NONZERO_PATTERN: 1839 case MAT_NEW_NONZERO_LOCATION_ERR: 1840 case MAT_USE_INODES: 1841 case MAT_IGNORE_ZERO_ENTRIES: 1842 MatCheckPreallocated(A,1); 1843 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1844 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1845 break; 1846 case MAT_ROW_ORIENTED: 1847 MatCheckPreallocated(A,1); 1848 a->roworiented = flg; 1849 1850 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1851 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1852 break; 1853 case MAT_NEW_DIAGONALS: 1854 case MAT_SORTED_FULL: 1855 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1856 break; 1857 case MAT_IGNORE_OFF_PROC_ENTRIES: 1858 a->donotstash = flg; 1859 break; 1860 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1861 case MAT_SPD: 1862 case MAT_SYMMETRIC: 1863 case MAT_STRUCTURALLY_SYMMETRIC: 1864 case MAT_HERMITIAN: 1865 case MAT_SYMMETRY_ETERNAL: 1866 break; 1867 case MAT_SUBMAT_SINGLEIS: 1868 A->submat_singleis = flg; 1869 break; 1870 case MAT_STRUCTURE_ONLY: 1871 /* The option is handled directly by MatSetOption() */ 1872 break; 1873 default: 1874 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1875 } 1876 PetscFunctionReturn(0); 1877 } 1878 1879 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1880 { 1881 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1882 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1883 PetscErrorCode ierr; 1884 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1885 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1886 PetscInt *cmap,*idx_p; 1887 1888 PetscFunctionBegin; 1889 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1890 mat->getrowactive = PETSC_TRUE; 1891 1892 if (!mat->rowvalues && (idx || v)) { 1893 /* 1894 allocate enough space to hold information from the longest row. 1895 */ 1896 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1897 PetscInt max = 1,tmp; 1898 for (i=0; i<matin->rmap->n; i++) { 1899 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1900 if (max < tmp) max = tmp; 1901 } 1902 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1903 } 1904 1905 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1906 lrow = row - rstart; 1907 1908 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1909 if (!v) {pvA = 0; pvB = 0;} 1910 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1911 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1912 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1913 nztot = nzA + nzB; 1914 1915 cmap = mat->garray; 1916 if (v || idx) { 1917 if (nztot) { 1918 /* Sort by increasing column numbers, assuming A and B already sorted */ 1919 PetscInt imark = -1; 1920 if (v) { 1921 *v = v_p = mat->rowvalues; 1922 for (i=0; i<nzB; i++) { 1923 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1924 else break; 1925 } 1926 imark = i; 1927 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1928 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1929 } 1930 if (idx) { 1931 *idx = idx_p = mat->rowindices; 1932 if (imark > -1) { 1933 for (i=0; i<imark; i++) { 1934 idx_p[i] = cmap[cworkB[i]]; 1935 } 1936 } else { 1937 for (i=0; i<nzB; i++) { 1938 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1939 else break; 1940 } 1941 imark = i; 1942 } 1943 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1944 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1945 } 1946 } else { 1947 if (idx) *idx = 0; 1948 if (v) *v = 0; 1949 } 1950 } 1951 *nz = nztot; 1952 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1953 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1954 PetscFunctionReturn(0); 1955 } 1956 1957 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1958 { 1959 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1960 1961 PetscFunctionBegin; 1962 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1963 aij->getrowactive = PETSC_FALSE; 1964 PetscFunctionReturn(0); 1965 } 1966 1967 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1968 { 1969 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1970 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1971 PetscErrorCode ierr; 1972 PetscInt i,j,cstart = mat->cmap->rstart; 1973 PetscReal sum = 0.0; 1974 MatScalar *v; 1975 1976 PetscFunctionBegin; 1977 if (aij->size == 1) { 1978 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1979 } else { 1980 if (type == NORM_FROBENIUS) { 1981 v = amat->a; 1982 for (i=0; i<amat->nz; i++) { 1983 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1984 } 1985 v = bmat->a; 1986 for (i=0; i<bmat->nz; i++) { 1987 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1988 } 1989 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1990 *norm = PetscSqrtReal(*norm); 1991 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1992 } else if (type == NORM_1) { /* max column norm */ 1993 PetscReal *tmp,*tmp2; 1994 PetscInt *jj,*garray = aij->garray; 1995 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1996 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1997 *norm = 0.0; 1998 v = amat->a; jj = amat->j; 1999 for (j=0; j<amat->nz; j++) { 2000 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2001 } 2002 v = bmat->a; jj = bmat->j; 2003 for (j=0; j<bmat->nz; j++) { 2004 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2005 } 2006 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2007 for (j=0; j<mat->cmap->N; j++) { 2008 if (tmp2[j] > *norm) *norm = tmp2[j]; 2009 } 2010 ierr = PetscFree(tmp);CHKERRQ(ierr); 2011 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2012 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2013 } else if (type == NORM_INFINITY) { /* max row norm */ 2014 PetscReal ntemp = 0.0; 2015 for (j=0; j<aij->A->rmap->n; j++) { 2016 v = amat->a + amat->i[j]; 2017 sum = 0.0; 2018 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2019 sum += PetscAbsScalar(*v); v++; 2020 } 2021 v = bmat->a + bmat->i[j]; 2022 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2023 sum += PetscAbsScalar(*v); v++; 2024 } 2025 if (sum > ntemp) ntemp = sum; 2026 } 2027 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2028 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2029 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2030 } 2031 PetscFunctionReturn(0); 2032 } 2033 2034 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2035 { 2036 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2037 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2038 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2039 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2040 PetscErrorCode ierr; 2041 Mat B,A_diag,*B_diag; 2042 const MatScalar *array; 2043 2044 PetscFunctionBegin; 2045 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2046 ai = Aloc->i; aj = Aloc->j; 2047 bi = Bloc->i; bj = Bloc->j; 2048 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2049 PetscInt *d_nnz,*g_nnz,*o_nnz; 2050 PetscSFNode *oloc; 2051 PETSC_UNUSED PetscSF sf; 2052 2053 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2054 /* compute d_nnz for preallocation */ 2055 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2056 for (i=0; i<ai[ma]; i++) { 2057 d_nnz[aj[i]]++; 2058 } 2059 /* compute local off-diagonal contributions */ 2060 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2061 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2062 /* map those to global */ 2063 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2064 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2065 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2066 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2067 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2068 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2069 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2070 2071 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2072 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2073 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2074 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2075 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2076 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2077 } else { 2078 B = *matout; 2079 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2080 } 2081 2082 b = (Mat_MPIAIJ*)B->data; 2083 A_diag = a->A; 2084 B_diag = &b->A; 2085 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2086 A_diag_ncol = A_diag->cmap->N; 2087 B_diag_ilen = sub_B_diag->ilen; 2088 B_diag_i = sub_B_diag->i; 2089 2090 /* Set ilen for diagonal of B */ 2091 for (i=0; i<A_diag_ncol; i++) { 2092 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2093 } 2094 2095 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2096 very quickly (=without using MatSetValues), because all writes are local. */ 2097 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2098 2099 /* copy over the B part */ 2100 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2101 array = Bloc->a; 2102 row = A->rmap->rstart; 2103 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2104 cols_tmp = cols; 2105 for (i=0; i<mb; i++) { 2106 ncol = bi[i+1]-bi[i]; 2107 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2108 row++; 2109 array += ncol; cols_tmp += ncol; 2110 } 2111 ierr = PetscFree(cols);CHKERRQ(ierr); 2112 2113 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2114 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2115 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2116 *matout = B; 2117 } else { 2118 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2119 } 2120 PetscFunctionReturn(0); 2121 } 2122 2123 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2124 { 2125 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2126 Mat a = aij->A,b = aij->B; 2127 PetscErrorCode ierr; 2128 PetscInt s1,s2,s3; 2129 2130 PetscFunctionBegin; 2131 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2132 if (rr) { 2133 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2134 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2135 /* Overlap communication with computation. */ 2136 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2137 } 2138 if (ll) { 2139 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2140 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2141 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2142 } 2143 /* scale the diagonal block */ 2144 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2145 2146 if (rr) { 2147 /* Do a scatter end and then right scale the off-diagonal block */ 2148 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2149 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2150 } 2151 PetscFunctionReturn(0); 2152 } 2153 2154 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2155 { 2156 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2157 PetscErrorCode ierr; 2158 2159 PetscFunctionBegin; 2160 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2161 PetscFunctionReturn(0); 2162 } 2163 2164 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2165 { 2166 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2167 Mat a,b,c,d; 2168 PetscBool flg; 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 a = matA->A; b = matA->B; 2173 c = matB->A; d = matB->B; 2174 2175 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2176 if (flg) { 2177 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2178 } 2179 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2180 PetscFunctionReturn(0); 2181 } 2182 2183 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2184 { 2185 PetscErrorCode ierr; 2186 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2187 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2188 2189 PetscFunctionBegin; 2190 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2191 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2192 /* because of the column compression in the off-processor part of the matrix a->B, 2193 the number of columns in a->B and b->B may be different, hence we cannot call 2194 the MatCopy() directly on the two parts. If need be, we can provide a more 2195 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2196 then copying the submatrices */ 2197 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2198 } else { 2199 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2200 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2201 } 2202 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2207 { 2208 PetscErrorCode ierr; 2209 2210 PetscFunctionBegin; 2211 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2212 PetscFunctionReturn(0); 2213 } 2214 2215 /* 2216 Computes the number of nonzeros per row needed for preallocation when X and Y 2217 have different nonzero structure. 2218 */ 2219 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2220 { 2221 PetscInt i,j,k,nzx,nzy; 2222 2223 PetscFunctionBegin; 2224 /* Set the number of nonzeros in the new matrix */ 2225 for (i=0; i<m; i++) { 2226 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2227 nzx = xi[i+1] - xi[i]; 2228 nzy = yi[i+1] - yi[i]; 2229 nnz[i] = 0; 2230 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2231 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2232 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2233 nnz[i]++; 2234 } 2235 for (; k<nzy; k++) nnz[i]++; 2236 } 2237 PetscFunctionReturn(0); 2238 } 2239 2240 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2241 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2242 { 2243 PetscErrorCode ierr; 2244 PetscInt m = Y->rmap->N; 2245 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2246 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2247 2248 PetscFunctionBegin; 2249 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2250 PetscFunctionReturn(0); 2251 } 2252 2253 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2254 { 2255 PetscErrorCode ierr; 2256 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2257 PetscBLASInt bnz,one=1; 2258 Mat_SeqAIJ *x,*y; 2259 2260 PetscFunctionBegin; 2261 if (str == SAME_NONZERO_PATTERN) { 2262 PetscScalar alpha = a; 2263 x = (Mat_SeqAIJ*)xx->A->data; 2264 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2265 y = (Mat_SeqAIJ*)yy->A->data; 2266 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2267 x = (Mat_SeqAIJ*)xx->B->data; 2268 y = (Mat_SeqAIJ*)yy->B->data; 2269 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2270 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2271 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2272 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2273 will be updated */ 2274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2275 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2276 Y->offloadmask = PETSC_OFFLOAD_CPU; 2277 } 2278 #endif 2279 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2280 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2281 } else { 2282 Mat B; 2283 PetscInt *nnz_d,*nnz_o; 2284 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2285 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2286 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2287 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2288 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2289 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2290 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2291 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2292 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2293 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2294 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2295 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2296 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2297 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2298 } 2299 PetscFunctionReturn(0); 2300 } 2301 2302 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2303 2304 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2305 { 2306 #if defined(PETSC_USE_COMPLEX) 2307 PetscErrorCode ierr; 2308 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2309 2310 PetscFunctionBegin; 2311 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2312 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2313 #else 2314 PetscFunctionBegin; 2315 #endif 2316 PetscFunctionReturn(0); 2317 } 2318 2319 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2320 { 2321 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2322 PetscErrorCode ierr; 2323 2324 PetscFunctionBegin; 2325 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2326 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2327 PetscFunctionReturn(0); 2328 } 2329 2330 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2331 { 2332 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2333 PetscErrorCode ierr; 2334 2335 PetscFunctionBegin; 2336 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2337 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2338 PetscFunctionReturn(0); 2339 } 2340 2341 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2342 { 2343 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2344 PetscErrorCode ierr; 2345 PetscInt i,*idxb = 0; 2346 PetscScalar *va,*vb; 2347 Vec vtmp; 2348 2349 PetscFunctionBegin; 2350 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2351 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2352 if (idx) { 2353 for (i=0; i<A->rmap->n; i++) { 2354 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2355 } 2356 } 2357 2358 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2359 if (idx) { 2360 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2361 } 2362 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2363 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2364 2365 for (i=0; i<A->rmap->n; i++) { 2366 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2367 va[i] = vb[i]; 2368 if (idx) idx[i] = a->garray[idxb[i]]; 2369 } 2370 } 2371 2372 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2373 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2374 ierr = PetscFree(idxb);CHKERRQ(ierr); 2375 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2380 { 2381 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2382 PetscErrorCode ierr; 2383 PetscInt i,*idxb = 0; 2384 PetscScalar *va,*vb; 2385 Vec vtmp; 2386 2387 PetscFunctionBegin; 2388 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2389 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2390 if (idx) { 2391 for (i=0; i<A->cmap->n; i++) { 2392 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2393 } 2394 } 2395 2396 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2397 if (idx) { 2398 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2399 } 2400 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2401 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2402 2403 for (i=0; i<A->rmap->n; i++) { 2404 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2405 va[i] = vb[i]; 2406 if (idx) idx[i] = a->garray[idxb[i]]; 2407 } 2408 } 2409 2410 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2411 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2412 ierr = PetscFree(idxb);CHKERRQ(ierr); 2413 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2414 PetscFunctionReturn(0); 2415 } 2416 2417 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2418 { 2419 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2420 PetscInt n = A->rmap->n; 2421 PetscInt cstart = A->cmap->rstart; 2422 PetscInt *cmap = mat->garray; 2423 PetscInt *diagIdx, *offdiagIdx; 2424 Vec diagV, offdiagV; 2425 PetscScalar *a, *diagA, *offdiagA; 2426 PetscInt r; 2427 PetscErrorCode ierr; 2428 2429 PetscFunctionBegin; 2430 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2431 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2432 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2433 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2434 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2435 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2436 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2437 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2438 for (r = 0; r < n; ++r) { 2439 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2440 a[r] = diagA[r]; 2441 idx[r] = cstart + diagIdx[r]; 2442 } else { 2443 a[r] = offdiagA[r]; 2444 idx[r] = cmap[offdiagIdx[r]]; 2445 } 2446 } 2447 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2448 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2449 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2450 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2451 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2452 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2457 { 2458 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2459 PetscInt n = A->rmap->n; 2460 PetscInt cstart = A->cmap->rstart; 2461 PetscInt *cmap = mat->garray; 2462 PetscInt *diagIdx, *offdiagIdx; 2463 Vec diagV, offdiagV; 2464 PetscScalar *a, *diagA, *offdiagA; 2465 PetscInt r; 2466 PetscErrorCode ierr; 2467 2468 PetscFunctionBegin; 2469 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2470 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2471 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2472 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2473 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2474 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2475 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2476 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2477 for (r = 0; r < n; ++r) { 2478 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2479 a[r] = diagA[r]; 2480 idx[r] = cstart + diagIdx[r]; 2481 } else { 2482 a[r] = offdiagA[r]; 2483 idx[r] = cmap[offdiagIdx[r]]; 2484 } 2485 } 2486 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2487 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2488 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2489 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2490 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2491 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2492 PetscFunctionReturn(0); 2493 } 2494 2495 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2496 { 2497 PetscErrorCode ierr; 2498 Mat *dummy; 2499 2500 PetscFunctionBegin; 2501 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2502 *newmat = *dummy; 2503 ierr = PetscFree(dummy);CHKERRQ(ierr); 2504 PetscFunctionReturn(0); 2505 } 2506 2507 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2508 { 2509 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2510 PetscErrorCode ierr; 2511 2512 PetscFunctionBegin; 2513 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2514 A->factorerrortype = a->A->factorerrortype; 2515 PetscFunctionReturn(0); 2516 } 2517 2518 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2519 { 2520 PetscErrorCode ierr; 2521 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2522 2523 PetscFunctionBegin; 2524 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2525 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2526 if (x->assembled) { 2527 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2528 } else { 2529 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2530 } 2531 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2532 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2533 PetscFunctionReturn(0); 2534 } 2535 2536 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2537 { 2538 PetscFunctionBegin; 2539 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2540 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2541 PetscFunctionReturn(0); 2542 } 2543 2544 /*@ 2545 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2546 2547 Collective on Mat 2548 2549 Input Parameters: 2550 + A - the matrix 2551 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2552 2553 Level: advanced 2554 2555 @*/ 2556 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2557 { 2558 PetscErrorCode ierr; 2559 2560 PetscFunctionBegin; 2561 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2566 { 2567 PetscErrorCode ierr; 2568 PetscBool sc = PETSC_FALSE,flg; 2569 2570 PetscFunctionBegin; 2571 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2572 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2573 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2574 if (flg) { 2575 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2576 } 2577 ierr = PetscOptionsTail();CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2582 { 2583 PetscErrorCode ierr; 2584 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2585 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2586 2587 PetscFunctionBegin; 2588 if (!Y->preallocated) { 2589 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2590 } else if (!aij->nz) { 2591 PetscInt nonew = aij->nonew; 2592 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2593 aij->nonew = nonew; 2594 } 2595 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2596 PetscFunctionReturn(0); 2597 } 2598 2599 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2600 { 2601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2602 PetscErrorCode ierr; 2603 2604 PetscFunctionBegin; 2605 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2606 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2607 if (d) { 2608 PetscInt rstart; 2609 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2610 *d += rstart; 2611 2612 } 2613 PetscFunctionReturn(0); 2614 } 2615 2616 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2617 { 2618 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2619 PetscErrorCode ierr; 2620 2621 PetscFunctionBegin; 2622 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2623 PetscFunctionReturn(0); 2624 } 2625 2626 /* -------------------------------------------------------------------*/ 2627 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2628 MatGetRow_MPIAIJ, 2629 MatRestoreRow_MPIAIJ, 2630 MatMult_MPIAIJ, 2631 /* 4*/ MatMultAdd_MPIAIJ, 2632 MatMultTranspose_MPIAIJ, 2633 MatMultTransposeAdd_MPIAIJ, 2634 0, 2635 0, 2636 0, 2637 /*10*/ 0, 2638 0, 2639 0, 2640 MatSOR_MPIAIJ, 2641 MatTranspose_MPIAIJ, 2642 /*15*/ MatGetInfo_MPIAIJ, 2643 MatEqual_MPIAIJ, 2644 MatGetDiagonal_MPIAIJ, 2645 MatDiagonalScale_MPIAIJ, 2646 MatNorm_MPIAIJ, 2647 /*20*/ MatAssemblyBegin_MPIAIJ, 2648 MatAssemblyEnd_MPIAIJ, 2649 MatSetOption_MPIAIJ, 2650 MatZeroEntries_MPIAIJ, 2651 /*24*/ MatZeroRows_MPIAIJ, 2652 0, 2653 0, 2654 0, 2655 0, 2656 /*29*/ MatSetUp_MPIAIJ, 2657 0, 2658 0, 2659 MatGetDiagonalBlock_MPIAIJ, 2660 0, 2661 /*34*/ MatDuplicate_MPIAIJ, 2662 0, 2663 0, 2664 0, 2665 0, 2666 /*39*/ MatAXPY_MPIAIJ, 2667 MatCreateSubMatrices_MPIAIJ, 2668 MatIncreaseOverlap_MPIAIJ, 2669 MatGetValues_MPIAIJ, 2670 MatCopy_MPIAIJ, 2671 /*44*/ MatGetRowMax_MPIAIJ, 2672 MatScale_MPIAIJ, 2673 MatShift_MPIAIJ, 2674 MatDiagonalSet_MPIAIJ, 2675 MatZeroRowsColumns_MPIAIJ, 2676 /*49*/ MatSetRandom_MPIAIJ, 2677 0, 2678 0, 2679 0, 2680 0, 2681 /*54*/ MatFDColoringCreate_MPIXAIJ, 2682 0, 2683 MatSetUnfactored_MPIAIJ, 2684 MatPermute_MPIAIJ, 2685 0, 2686 /*59*/ MatCreateSubMatrix_MPIAIJ, 2687 MatDestroy_MPIAIJ, 2688 MatView_MPIAIJ, 2689 0, 2690 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2691 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2692 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2693 0, 2694 0, 2695 0, 2696 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2697 MatGetRowMinAbs_MPIAIJ, 2698 0, 2699 0, 2700 0, 2701 0, 2702 /*75*/ MatFDColoringApply_AIJ, 2703 MatSetFromOptions_MPIAIJ, 2704 0, 2705 0, 2706 MatFindZeroDiagonals_MPIAIJ, 2707 /*80*/ 0, 2708 0, 2709 0, 2710 /*83*/ MatLoad_MPIAIJ, 2711 MatIsSymmetric_MPIAIJ, 2712 0, 2713 0, 2714 0, 2715 0, 2716 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2717 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2718 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2719 MatPtAP_MPIAIJ_MPIAIJ, 2720 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2721 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2722 0, 2723 0, 2724 0, 2725 MatPinToCPU_MPIAIJ, 2726 /*99*/ 0, 2727 0, 2728 0, 2729 MatConjugate_MPIAIJ, 2730 0, 2731 /*104*/MatSetValuesRow_MPIAIJ, 2732 MatRealPart_MPIAIJ, 2733 MatImaginaryPart_MPIAIJ, 2734 0, 2735 0, 2736 /*109*/0, 2737 0, 2738 MatGetRowMin_MPIAIJ, 2739 0, 2740 MatMissingDiagonal_MPIAIJ, 2741 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2742 0, 2743 MatGetGhosts_MPIAIJ, 2744 0, 2745 0, 2746 /*119*/0, 2747 0, 2748 0, 2749 0, 2750 MatGetMultiProcBlock_MPIAIJ, 2751 /*124*/MatFindNonzeroRows_MPIAIJ, 2752 MatGetColumnNorms_MPIAIJ, 2753 MatInvertBlockDiagonal_MPIAIJ, 2754 MatInvertVariableBlockDiagonal_MPIAIJ, 2755 MatCreateSubMatricesMPI_MPIAIJ, 2756 /*129*/0, 2757 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2758 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2759 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2760 0, 2761 /*134*/0, 2762 0, 2763 MatRARt_MPIAIJ_MPIAIJ, 2764 0, 2765 0, 2766 /*139*/MatSetBlockSizes_MPIAIJ, 2767 0, 2768 0, 2769 MatFDColoringSetUp_MPIXAIJ, 2770 MatFindOffBlockDiagonalEntries_MPIAIJ, 2771 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2772 }; 2773 2774 /* ----------------------------------------------------------------------------------------*/ 2775 2776 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2777 { 2778 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2779 PetscErrorCode ierr; 2780 2781 PetscFunctionBegin; 2782 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2783 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2784 PetscFunctionReturn(0); 2785 } 2786 2787 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2788 { 2789 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2790 PetscErrorCode ierr; 2791 2792 PetscFunctionBegin; 2793 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2794 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2795 PetscFunctionReturn(0); 2796 } 2797 2798 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2799 { 2800 Mat_MPIAIJ *b; 2801 PetscErrorCode ierr; 2802 PetscMPIInt size; 2803 2804 PetscFunctionBegin; 2805 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2806 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2807 b = (Mat_MPIAIJ*)B->data; 2808 2809 #if defined(PETSC_USE_CTABLE) 2810 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2811 #else 2812 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2813 #endif 2814 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2815 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2816 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2817 2818 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2819 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2820 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2821 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2822 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2823 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2824 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2825 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2826 2827 if (!B->preallocated) { 2828 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2829 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2830 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2831 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2832 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2833 } 2834 2835 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2836 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2837 B->preallocated = PETSC_TRUE; 2838 B->was_assembled = PETSC_FALSE; 2839 B->assembled = PETSC_FALSE; 2840 PetscFunctionReturn(0); 2841 } 2842 2843 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2844 { 2845 Mat_MPIAIJ *b; 2846 PetscErrorCode ierr; 2847 2848 PetscFunctionBegin; 2849 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2850 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2851 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2852 b = (Mat_MPIAIJ*)B->data; 2853 2854 #if defined(PETSC_USE_CTABLE) 2855 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2856 #else 2857 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2858 #endif 2859 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2860 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2861 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2862 2863 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2864 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2865 B->preallocated = PETSC_TRUE; 2866 B->was_assembled = PETSC_FALSE; 2867 B->assembled = PETSC_FALSE; 2868 PetscFunctionReturn(0); 2869 } 2870 2871 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2872 { 2873 Mat mat; 2874 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2875 PetscErrorCode ierr; 2876 2877 PetscFunctionBegin; 2878 *newmat = 0; 2879 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2880 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2881 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2882 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2883 a = (Mat_MPIAIJ*)mat->data; 2884 2885 mat->factortype = matin->factortype; 2886 mat->assembled = PETSC_TRUE; 2887 mat->insertmode = NOT_SET_VALUES; 2888 mat->preallocated = PETSC_TRUE; 2889 2890 a->size = oldmat->size; 2891 a->rank = oldmat->rank; 2892 a->donotstash = oldmat->donotstash; 2893 a->roworiented = oldmat->roworiented; 2894 a->rowindices = 0; 2895 a->rowvalues = 0; 2896 a->getrowactive = PETSC_FALSE; 2897 2898 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2899 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2900 2901 if (oldmat->colmap) { 2902 #if defined(PETSC_USE_CTABLE) 2903 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2904 #else 2905 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2906 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2907 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2908 #endif 2909 } else a->colmap = 0; 2910 if (oldmat->garray) { 2911 PetscInt len; 2912 len = oldmat->B->cmap->n; 2913 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2914 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2915 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2916 } else a->garray = 0; 2917 2918 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2919 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2920 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2921 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2922 2923 if (oldmat->Mvctx_mpi1) { 2924 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2925 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2926 } 2927 2928 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2929 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2930 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2931 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2932 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2933 *newmat = mat; 2934 PetscFunctionReturn(0); 2935 } 2936 2937 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2938 { 2939 PetscBool isbinary, ishdf5; 2940 PetscErrorCode ierr; 2941 2942 PetscFunctionBegin; 2943 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2944 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2945 /* force binary viewer to load .info file if it has not yet done so */ 2946 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2947 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2948 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2949 if (isbinary) { 2950 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2951 } else if (ishdf5) { 2952 #if defined(PETSC_HAVE_HDF5) 2953 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2954 #else 2955 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2956 #endif 2957 } else { 2958 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2959 } 2960 PetscFunctionReturn(0); 2961 } 2962 2963 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2964 { 2965 PetscScalar *vals,*svals; 2966 MPI_Comm comm; 2967 PetscErrorCode ierr; 2968 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2969 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2970 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2971 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2972 PetscInt cend,cstart,n,*rowners; 2973 int fd; 2974 PetscInt bs = newMat->rmap->bs; 2975 2976 PetscFunctionBegin; 2977 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2978 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2979 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2980 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2981 if (!rank) { 2982 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2983 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2984 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2985 } 2986 2987 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2988 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2989 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2990 if (bs < 0) bs = 1; 2991 2992 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2993 M = header[1]; N = header[2]; 2994 2995 /* If global sizes are set, check if they are consistent with that given in the file */ 2996 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2997 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2998 2999 /* determine ownership of all (block) rows */ 3000 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3001 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3002 else m = newMat->rmap->n; /* Set by user */ 3003 3004 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 3005 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3006 3007 /* First process needs enough room for process with most rows */ 3008 if (!rank) { 3009 mmax = rowners[1]; 3010 for (i=2; i<=size; i++) { 3011 mmax = PetscMax(mmax, rowners[i]); 3012 } 3013 } else mmax = -1; /* unused, but compilers complain */ 3014 3015 rowners[0] = 0; 3016 for (i=2; i<=size; i++) { 3017 rowners[i] += rowners[i-1]; 3018 } 3019 rstart = rowners[rank]; 3020 rend = rowners[rank+1]; 3021 3022 /* distribute row lengths to all processors */ 3023 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3024 if (!rank) { 3025 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 3026 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3027 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3028 for (j=0; j<m; j++) { 3029 procsnz[0] += ourlens[j]; 3030 } 3031 for (i=1; i<size; i++) { 3032 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 3033 /* calculate the number of nonzeros on each processor */ 3034 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3035 procsnz[i] += rowlengths[j]; 3036 } 3037 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3038 } 3039 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3040 } else { 3041 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3042 } 3043 3044 if (!rank) { 3045 /* determine max buffer needed and allocate it */ 3046 maxnz = 0; 3047 for (i=0; i<size; i++) { 3048 maxnz = PetscMax(maxnz,procsnz[i]); 3049 } 3050 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3051 3052 /* read in my part of the matrix column indices */ 3053 nz = procsnz[0]; 3054 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3055 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3056 3057 /* read in every one elses and ship off */ 3058 for (i=1; i<size; i++) { 3059 nz = procsnz[i]; 3060 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3061 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3062 } 3063 ierr = PetscFree(cols);CHKERRQ(ierr); 3064 } else { 3065 /* determine buffer space needed for message */ 3066 nz = 0; 3067 for (i=0; i<m; i++) { 3068 nz += ourlens[i]; 3069 } 3070 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3071 3072 /* receive message of column indices*/ 3073 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3074 } 3075 3076 /* determine column ownership if matrix is not square */ 3077 if (N != M) { 3078 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3079 else n = newMat->cmap->n; 3080 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3081 cstart = cend - n; 3082 } else { 3083 cstart = rstart; 3084 cend = rend; 3085 n = cend - cstart; 3086 } 3087 3088 /* loop over local rows, determining number of off diagonal entries */ 3089 ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr); 3090 jj = 0; 3091 for (i=0; i<m; i++) { 3092 for (j=0; j<ourlens[i]; j++) { 3093 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3094 jj++; 3095 } 3096 } 3097 3098 for (i=0; i<m; i++) { 3099 ourlens[i] -= offlens[i]; 3100 } 3101 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3102 3103 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3104 3105 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3106 3107 for (i=0; i<m; i++) { 3108 ourlens[i] += offlens[i]; 3109 } 3110 3111 if (!rank) { 3112 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3113 3114 /* read in my part of the matrix numerical values */ 3115 nz = procsnz[0]; 3116 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3117 3118 /* insert into matrix */ 3119 jj = rstart; 3120 smycols = mycols; 3121 svals = vals; 3122 for (i=0; i<m; i++) { 3123 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3124 smycols += ourlens[i]; 3125 svals += ourlens[i]; 3126 jj++; 3127 } 3128 3129 /* read in other processors and ship out */ 3130 for (i=1; i<size; i++) { 3131 nz = procsnz[i]; 3132 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3133 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3134 } 3135 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3136 } else { 3137 /* receive numeric values */ 3138 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3139 3140 /* receive message of values*/ 3141 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3142 3143 /* insert into matrix */ 3144 jj = rstart; 3145 smycols = mycols; 3146 svals = vals; 3147 for (i=0; i<m; i++) { 3148 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3149 smycols += ourlens[i]; 3150 svals += ourlens[i]; 3151 jj++; 3152 } 3153 } 3154 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3155 ierr = PetscFree(vals);CHKERRQ(ierr); 3156 ierr = PetscFree(mycols);CHKERRQ(ierr); 3157 ierr = PetscFree(rowners);CHKERRQ(ierr); 3158 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3159 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3160 PetscFunctionReturn(0); 3161 } 3162 3163 /* Not scalable because of ISAllGather() unless getting all columns. */ 3164 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3165 { 3166 PetscErrorCode ierr; 3167 IS iscol_local; 3168 PetscBool isstride; 3169 PetscMPIInt lisstride=0,gisstride; 3170 3171 PetscFunctionBegin; 3172 /* check if we are grabbing all columns*/ 3173 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3174 3175 if (isstride) { 3176 PetscInt start,len,mstart,mlen; 3177 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3178 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3179 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3180 if (mstart == start && mlen-mstart == len) lisstride = 1; 3181 } 3182 3183 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3184 if (gisstride) { 3185 PetscInt N; 3186 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3187 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3188 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3189 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3190 } else { 3191 PetscInt cbs; 3192 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3193 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3194 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3195 } 3196 3197 *isseq = iscol_local; 3198 PetscFunctionReturn(0); 3199 } 3200 3201 /* 3202 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3203 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3204 3205 Input Parameters: 3206 mat - matrix 3207 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3208 i.e., mat->rstart <= isrow[i] < mat->rend 3209 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3210 i.e., mat->cstart <= iscol[i] < mat->cend 3211 Output Parameter: 3212 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3213 iscol_o - sequential column index set for retrieving mat->B 3214 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3215 */ 3216 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3217 { 3218 PetscErrorCode ierr; 3219 Vec x,cmap; 3220 const PetscInt *is_idx; 3221 PetscScalar *xarray,*cmaparray; 3222 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3223 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3224 Mat B=a->B; 3225 Vec lvec=a->lvec,lcmap; 3226 PetscInt i,cstart,cend,Bn=B->cmap->N; 3227 MPI_Comm comm; 3228 VecScatter Mvctx=a->Mvctx; 3229 3230 PetscFunctionBegin; 3231 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3232 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3233 3234 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3235 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3236 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3237 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3238 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3239 3240 /* Get start indices */ 3241 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3242 isstart -= ncols; 3243 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3244 3245 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3246 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3247 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3248 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3249 for (i=0; i<ncols; i++) { 3250 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3251 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3252 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3253 } 3254 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3255 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3256 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3257 3258 /* Get iscol_d */ 3259 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3260 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3261 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3262 3263 /* Get isrow_d */ 3264 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3265 rstart = mat->rmap->rstart; 3266 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3267 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3268 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3269 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3270 3271 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3272 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3273 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3274 3275 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3276 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3277 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3278 3279 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3280 3281 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3282 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3283 3284 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3285 /* off-process column indices */ 3286 count = 0; 3287 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3288 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3289 3290 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3291 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3292 for (i=0; i<Bn; i++) { 3293 if (PetscRealPart(xarray[i]) > -1.0) { 3294 idx[count] = i; /* local column index in off-diagonal part B */ 3295 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3296 count++; 3297 } 3298 } 3299 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3300 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3301 3302 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3303 /* cannot ensure iscol_o has same blocksize as iscol! */ 3304 3305 ierr = PetscFree(idx);CHKERRQ(ierr); 3306 *garray = cmap1; 3307 3308 ierr = VecDestroy(&x);CHKERRQ(ierr); 3309 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3310 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3311 PetscFunctionReturn(0); 3312 } 3313 3314 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3315 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3316 { 3317 PetscErrorCode ierr; 3318 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3319 Mat M = NULL; 3320 MPI_Comm comm; 3321 IS iscol_d,isrow_d,iscol_o; 3322 Mat Asub = NULL,Bsub = NULL; 3323 PetscInt n; 3324 3325 PetscFunctionBegin; 3326 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3327 3328 if (call == MAT_REUSE_MATRIX) { 3329 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3330 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3331 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3332 3333 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3334 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3335 3336 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3337 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3338 3339 /* Update diagonal and off-diagonal portions of submat */ 3340 asub = (Mat_MPIAIJ*)(*submat)->data; 3341 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3342 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3343 if (n) { 3344 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3345 } 3346 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3347 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3348 3349 } else { /* call == MAT_INITIAL_MATRIX) */ 3350 const PetscInt *garray; 3351 PetscInt BsubN; 3352 3353 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3354 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3355 3356 /* Create local submatrices Asub and Bsub */ 3357 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3358 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3359 3360 /* Create submatrix M */ 3361 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3362 3363 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3364 asub = (Mat_MPIAIJ*)M->data; 3365 3366 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3367 n = asub->B->cmap->N; 3368 if (BsubN > n) { 3369 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3370 const PetscInt *idx; 3371 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3372 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3373 3374 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3375 j = 0; 3376 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3377 for (i=0; i<n; i++) { 3378 if (j >= BsubN) break; 3379 while (subgarray[i] > garray[j]) j++; 3380 3381 if (subgarray[i] == garray[j]) { 3382 idx_new[i] = idx[j++]; 3383 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3384 } 3385 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3386 3387 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3388 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3389 3390 } else if (BsubN < n) { 3391 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3392 } 3393 3394 ierr = PetscFree(garray);CHKERRQ(ierr); 3395 *submat = M; 3396 3397 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3398 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3399 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3400 3401 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3402 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3403 3404 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3405 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3406 } 3407 PetscFunctionReturn(0); 3408 } 3409 3410 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3411 { 3412 PetscErrorCode ierr; 3413 IS iscol_local=NULL,isrow_d; 3414 PetscInt csize; 3415 PetscInt n,i,j,start,end; 3416 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3417 MPI_Comm comm; 3418 3419 PetscFunctionBegin; 3420 /* If isrow has same processor distribution as mat, 3421 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3422 if (call == MAT_REUSE_MATRIX) { 3423 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3424 if (isrow_d) { 3425 sameRowDist = PETSC_TRUE; 3426 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3427 } else { 3428 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3429 if (iscol_local) { 3430 sameRowDist = PETSC_TRUE; 3431 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3432 } 3433 } 3434 } else { 3435 /* Check if isrow has same processor distribution as mat */ 3436 sameDist[0] = PETSC_FALSE; 3437 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3438 if (!n) { 3439 sameDist[0] = PETSC_TRUE; 3440 } else { 3441 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3442 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3443 if (i >= start && j < end) { 3444 sameDist[0] = PETSC_TRUE; 3445 } 3446 } 3447 3448 /* Check if iscol has same processor distribution as mat */ 3449 sameDist[1] = PETSC_FALSE; 3450 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3451 if (!n) { 3452 sameDist[1] = PETSC_TRUE; 3453 } else { 3454 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3455 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3456 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3457 } 3458 3459 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3460 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3461 sameRowDist = tsameDist[0]; 3462 } 3463 3464 if (sameRowDist) { 3465 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3466 /* isrow and iscol have same processor distribution as mat */ 3467 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3468 PetscFunctionReturn(0); 3469 } else { /* sameRowDist */ 3470 /* isrow has same processor distribution as mat */ 3471 if (call == MAT_INITIAL_MATRIX) { 3472 PetscBool sorted; 3473 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3474 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3475 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3476 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3477 3478 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3479 if (sorted) { 3480 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3481 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3482 PetscFunctionReturn(0); 3483 } 3484 } else { /* call == MAT_REUSE_MATRIX */ 3485 IS iscol_sub; 3486 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3487 if (iscol_sub) { 3488 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3489 PetscFunctionReturn(0); 3490 } 3491 } 3492 } 3493 } 3494 3495 /* General case: iscol -> iscol_local which has global size of iscol */ 3496 if (call == MAT_REUSE_MATRIX) { 3497 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3498 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3499 } else { 3500 if (!iscol_local) { 3501 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3502 } 3503 } 3504 3505 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3506 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3507 3508 if (call == MAT_INITIAL_MATRIX) { 3509 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3510 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3511 } 3512 PetscFunctionReturn(0); 3513 } 3514 3515 /*@C 3516 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3517 and "off-diagonal" part of the matrix in CSR format. 3518 3519 Collective 3520 3521 Input Parameters: 3522 + comm - MPI communicator 3523 . A - "diagonal" portion of matrix 3524 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3525 - garray - global index of B columns 3526 3527 Output Parameter: 3528 . mat - the matrix, with input A as its local diagonal matrix 3529 Level: advanced 3530 3531 Notes: 3532 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3533 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3534 3535 .seealso: MatCreateMPIAIJWithSplitArrays() 3536 @*/ 3537 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3538 { 3539 PetscErrorCode ierr; 3540 Mat_MPIAIJ *maij; 3541 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3542 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3543 PetscScalar *oa=b->a; 3544 Mat Bnew; 3545 PetscInt m,n,N; 3546 3547 PetscFunctionBegin; 3548 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3549 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3550 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3551 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3552 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3553 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3554 3555 /* Get global columns of mat */ 3556 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3557 3558 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3559 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3560 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3561 maij = (Mat_MPIAIJ*)(*mat)->data; 3562 3563 (*mat)->preallocated = PETSC_TRUE; 3564 3565 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3566 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3567 3568 /* Set A as diagonal portion of *mat */ 3569 maij->A = A; 3570 3571 nz = oi[m]; 3572 for (i=0; i<nz; i++) { 3573 col = oj[i]; 3574 oj[i] = garray[col]; 3575 } 3576 3577 /* Set Bnew as off-diagonal portion of *mat */ 3578 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3579 bnew = (Mat_SeqAIJ*)Bnew->data; 3580 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3581 maij->B = Bnew; 3582 3583 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3584 3585 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3586 b->free_a = PETSC_FALSE; 3587 b->free_ij = PETSC_FALSE; 3588 ierr = MatDestroy(&B);CHKERRQ(ierr); 3589 3590 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3591 bnew->free_a = PETSC_TRUE; 3592 bnew->free_ij = PETSC_TRUE; 3593 3594 /* condense columns of maij->B */ 3595 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3596 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3597 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3598 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3599 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3600 PetscFunctionReturn(0); 3601 } 3602 3603 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3604 3605 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3606 { 3607 PetscErrorCode ierr; 3608 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3609 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3610 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3611 Mat M,Msub,B=a->B; 3612 MatScalar *aa; 3613 Mat_SeqAIJ *aij; 3614 PetscInt *garray = a->garray,*colsub,Ncols; 3615 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3616 IS iscol_sub,iscmap; 3617 const PetscInt *is_idx,*cmap; 3618 PetscBool allcolumns=PETSC_FALSE; 3619 MPI_Comm comm; 3620 3621 PetscFunctionBegin; 3622 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3623 3624 if (call == MAT_REUSE_MATRIX) { 3625 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3626 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3627 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3628 3629 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3630 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3631 3632 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3633 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3634 3635 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3636 3637 } else { /* call == MAT_INITIAL_MATRIX) */ 3638 PetscBool flg; 3639 3640 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3641 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3642 3643 /* (1) iscol -> nonscalable iscol_local */ 3644 /* Check for special case: each processor gets entire matrix columns */ 3645 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3646 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3647 if (allcolumns) { 3648 iscol_sub = iscol_local; 3649 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3650 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3651 3652 } else { 3653 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3654 PetscInt *idx,*cmap1,k; 3655 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3656 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3657 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3658 count = 0; 3659 k = 0; 3660 for (i=0; i<Ncols; i++) { 3661 j = is_idx[i]; 3662 if (j >= cstart && j < cend) { 3663 /* diagonal part of mat */ 3664 idx[count] = j; 3665 cmap1[count++] = i; /* column index in submat */ 3666 } else if (Bn) { 3667 /* off-diagonal part of mat */ 3668 if (j == garray[k]) { 3669 idx[count] = j; 3670 cmap1[count++] = i; /* column index in submat */ 3671 } else if (j > garray[k]) { 3672 while (j > garray[k] && k < Bn-1) k++; 3673 if (j == garray[k]) { 3674 idx[count] = j; 3675 cmap1[count++] = i; /* column index in submat */ 3676 } 3677 } 3678 } 3679 } 3680 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3681 3682 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3683 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3684 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3685 3686 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3687 } 3688 3689 /* (3) Create sequential Msub */ 3690 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3691 } 3692 3693 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3694 aij = (Mat_SeqAIJ*)(Msub)->data; 3695 ii = aij->i; 3696 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3697 3698 /* 3699 m - number of local rows 3700 Ncols - number of columns (same on all processors) 3701 rstart - first row in new global matrix generated 3702 */ 3703 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3704 3705 if (call == MAT_INITIAL_MATRIX) { 3706 /* (4) Create parallel newmat */ 3707 PetscMPIInt rank,size; 3708 PetscInt csize; 3709 3710 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3711 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3712 3713 /* 3714 Determine the number of non-zeros in the diagonal and off-diagonal 3715 portions of the matrix in order to do correct preallocation 3716 */ 3717 3718 /* first get start and end of "diagonal" columns */ 3719 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3720 if (csize == PETSC_DECIDE) { 3721 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3722 if (mglobal == Ncols) { /* square matrix */ 3723 nlocal = m; 3724 } else { 3725 nlocal = Ncols/size + ((Ncols % size) > rank); 3726 } 3727 } else { 3728 nlocal = csize; 3729 } 3730 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3731 rstart = rend - nlocal; 3732 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3733 3734 /* next, compute all the lengths */ 3735 jj = aij->j; 3736 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3737 olens = dlens + m; 3738 for (i=0; i<m; i++) { 3739 jend = ii[i+1] - ii[i]; 3740 olen = 0; 3741 dlen = 0; 3742 for (j=0; j<jend; j++) { 3743 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3744 else dlen++; 3745 jj++; 3746 } 3747 olens[i] = olen; 3748 dlens[i] = dlen; 3749 } 3750 3751 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3752 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3753 3754 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3755 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3756 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3757 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3758 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3759 ierr = PetscFree(dlens);CHKERRQ(ierr); 3760 3761 } else { /* call == MAT_REUSE_MATRIX */ 3762 M = *newmat; 3763 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3764 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3765 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3766 /* 3767 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3768 rather than the slower MatSetValues(). 3769 */ 3770 M->was_assembled = PETSC_TRUE; 3771 M->assembled = PETSC_FALSE; 3772 } 3773 3774 /* (5) Set values of Msub to *newmat */ 3775 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3776 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3777 3778 jj = aij->j; 3779 aa = aij->a; 3780 for (i=0; i<m; i++) { 3781 row = rstart + i; 3782 nz = ii[i+1] - ii[i]; 3783 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3784 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3785 jj += nz; aa += nz; 3786 } 3787 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3788 3789 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3790 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3791 3792 ierr = PetscFree(colsub);CHKERRQ(ierr); 3793 3794 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3795 if (call == MAT_INITIAL_MATRIX) { 3796 *newmat = M; 3797 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3798 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3799 3800 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3801 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3802 3803 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3804 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3805 3806 if (iscol_local) { 3807 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3808 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3809 } 3810 } 3811 PetscFunctionReturn(0); 3812 } 3813 3814 /* 3815 Not great since it makes two copies of the submatrix, first an SeqAIJ 3816 in local and then by concatenating the local matrices the end result. 3817 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3818 3819 Note: This requires a sequential iscol with all indices. 3820 */ 3821 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3822 { 3823 PetscErrorCode ierr; 3824 PetscMPIInt rank,size; 3825 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3826 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3827 Mat M,Mreuse; 3828 MatScalar *aa,*vwork; 3829 MPI_Comm comm; 3830 Mat_SeqAIJ *aij; 3831 PetscBool colflag,allcolumns=PETSC_FALSE; 3832 3833 PetscFunctionBegin; 3834 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3835 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3836 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3837 3838 /* Check for special case: each processor gets entire matrix columns */ 3839 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3840 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3841 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3842 3843 if (call == MAT_REUSE_MATRIX) { 3844 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3845 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3846 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3847 } else { 3848 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3849 } 3850 3851 /* 3852 m - number of local rows 3853 n - number of columns (same on all processors) 3854 rstart - first row in new global matrix generated 3855 */ 3856 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3857 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3858 if (call == MAT_INITIAL_MATRIX) { 3859 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3860 ii = aij->i; 3861 jj = aij->j; 3862 3863 /* 3864 Determine the number of non-zeros in the diagonal and off-diagonal 3865 portions of the matrix in order to do correct preallocation 3866 */ 3867 3868 /* first get start and end of "diagonal" columns */ 3869 if (csize == PETSC_DECIDE) { 3870 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3871 if (mglobal == n) { /* square matrix */ 3872 nlocal = m; 3873 } else { 3874 nlocal = n/size + ((n % size) > rank); 3875 } 3876 } else { 3877 nlocal = csize; 3878 } 3879 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3880 rstart = rend - nlocal; 3881 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3882 3883 /* next, compute all the lengths */ 3884 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3885 olens = dlens + m; 3886 for (i=0; i<m; i++) { 3887 jend = ii[i+1] - ii[i]; 3888 olen = 0; 3889 dlen = 0; 3890 for (j=0; j<jend; j++) { 3891 if (*jj < rstart || *jj >= rend) olen++; 3892 else dlen++; 3893 jj++; 3894 } 3895 olens[i] = olen; 3896 dlens[i] = dlen; 3897 } 3898 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3899 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3900 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3901 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3902 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3903 ierr = PetscFree(dlens);CHKERRQ(ierr); 3904 } else { 3905 PetscInt ml,nl; 3906 3907 M = *newmat; 3908 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3909 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3910 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3911 /* 3912 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3913 rather than the slower MatSetValues(). 3914 */ 3915 M->was_assembled = PETSC_TRUE; 3916 M->assembled = PETSC_FALSE; 3917 } 3918 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3919 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3920 ii = aij->i; 3921 jj = aij->j; 3922 aa = aij->a; 3923 for (i=0; i<m; i++) { 3924 row = rstart + i; 3925 nz = ii[i+1] - ii[i]; 3926 cwork = jj; jj += nz; 3927 vwork = aa; aa += nz; 3928 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3929 } 3930 3931 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3932 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3933 *newmat = M; 3934 3935 /* save submatrix used in processor for next request */ 3936 if (call == MAT_INITIAL_MATRIX) { 3937 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3938 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3939 } 3940 PetscFunctionReturn(0); 3941 } 3942 3943 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3944 { 3945 PetscInt m,cstart, cend,j,nnz,i,d; 3946 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3947 const PetscInt *JJ; 3948 PetscErrorCode ierr; 3949 PetscBool nooffprocentries; 3950 3951 PetscFunctionBegin; 3952 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3953 3954 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3955 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3956 m = B->rmap->n; 3957 cstart = B->cmap->rstart; 3958 cend = B->cmap->rend; 3959 rstart = B->rmap->rstart; 3960 3961 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3962 3963 #if defined(PETSC_USE_DEBUG) 3964 for (i=0; i<m; i++) { 3965 nnz = Ii[i+1]- Ii[i]; 3966 JJ = J + Ii[i]; 3967 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3968 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3969 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3970 } 3971 #endif 3972 3973 for (i=0; i<m; i++) { 3974 nnz = Ii[i+1]- Ii[i]; 3975 JJ = J + Ii[i]; 3976 nnz_max = PetscMax(nnz_max,nnz); 3977 d = 0; 3978 for (j=0; j<nnz; j++) { 3979 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3980 } 3981 d_nnz[i] = d; 3982 o_nnz[i] = nnz - d; 3983 } 3984 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3985 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3986 3987 for (i=0; i<m; i++) { 3988 ii = i + rstart; 3989 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3990 } 3991 nooffprocentries = B->nooffprocentries; 3992 B->nooffprocentries = PETSC_TRUE; 3993 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3994 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3995 B->nooffprocentries = nooffprocentries; 3996 3997 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3998 PetscFunctionReturn(0); 3999 } 4000 4001 /*@ 4002 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4003 (the default parallel PETSc format). 4004 4005 Collective 4006 4007 Input Parameters: 4008 + B - the matrix 4009 . i - the indices into j for the start of each local row (starts with zero) 4010 . j - the column indices for each local row (starts with zero) 4011 - v - optional values in the matrix 4012 4013 Level: developer 4014 4015 Notes: 4016 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4017 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4018 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4019 4020 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4021 4022 The format which is used for the sparse matrix input, is equivalent to a 4023 row-major ordering.. i.e for the following matrix, the input data expected is 4024 as shown 4025 4026 $ 1 0 0 4027 $ 2 0 3 P0 4028 $ ------- 4029 $ 4 5 6 P1 4030 $ 4031 $ Process0 [P0]: rows_owned=[0,1] 4032 $ i = {0,1,3} [size = nrow+1 = 2+1] 4033 $ j = {0,0,2} [size = 3] 4034 $ v = {1,2,3} [size = 3] 4035 $ 4036 $ Process1 [P1]: rows_owned=[2] 4037 $ i = {0,3} [size = nrow+1 = 1+1] 4038 $ j = {0,1,2} [size = 3] 4039 $ v = {4,5,6} [size = 3] 4040 4041 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4042 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4043 @*/ 4044 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4045 { 4046 PetscErrorCode ierr; 4047 4048 PetscFunctionBegin; 4049 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4050 PetscFunctionReturn(0); 4051 } 4052 4053 /*@C 4054 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4055 (the default parallel PETSc format). For good matrix assembly performance 4056 the user should preallocate the matrix storage by setting the parameters 4057 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4058 performance can be increased by more than a factor of 50. 4059 4060 Collective 4061 4062 Input Parameters: 4063 + B - the matrix 4064 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4065 (same value is used for all local rows) 4066 . d_nnz - array containing the number of nonzeros in the various rows of the 4067 DIAGONAL portion of the local submatrix (possibly different for each row) 4068 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4069 The size of this array is equal to the number of local rows, i.e 'm'. 4070 For matrices that will be factored, you must leave room for (and set) 4071 the diagonal entry even if it is zero. 4072 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4073 submatrix (same value is used for all local rows). 4074 - o_nnz - array containing the number of nonzeros in the various rows of the 4075 OFF-DIAGONAL portion of the local submatrix (possibly different for 4076 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4077 structure. The size of this array is equal to the number 4078 of local rows, i.e 'm'. 4079 4080 If the *_nnz parameter is given then the *_nz parameter is ignored 4081 4082 The AIJ format (also called the Yale sparse matrix format or 4083 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4084 storage. The stored row and column indices begin with zero. 4085 See Users-Manual: ch_mat for details. 4086 4087 The parallel matrix is partitioned such that the first m0 rows belong to 4088 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4089 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4090 4091 The DIAGONAL portion of the local submatrix of a processor can be defined 4092 as the submatrix which is obtained by extraction the part corresponding to 4093 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4094 first row that belongs to the processor, r2 is the last row belonging to 4095 the this processor, and c1-c2 is range of indices of the local part of a 4096 vector suitable for applying the matrix to. This is an mxn matrix. In the 4097 common case of a square matrix, the row and column ranges are the same and 4098 the DIAGONAL part is also square. The remaining portion of the local 4099 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4100 4101 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4102 4103 You can call MatGetInfo() to get information on how effective the preallocation was; 4104 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4105 You can also run with the option -info and look for messages with the string 4106 malloc in them to see if additional memory allocation was needed. 4107 4108 Example usage: 4109 4110 Consider the following 8x8 matrix with 34 non-zero values, that is 4111 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4112 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4113 as follows: 4114 4115 .vb 4116 1 2 0 | 0 3 0 | 0 4 4117 Proc0 0 5 6 | 7 0 0 | 8 0 4118 9 0 10 | 11 0 0 | 12 0 4119 ------------------------------------- 4120 13 0 14 | 15 16 17 | 0 0 4121 Proc1 0 18 0 | 19 20 21 | 0 0 4122 0 0 0 | 22 23 0 | 24 0 4123 ------------------------------------- 4124 Proc2 25 26 27 | 0 0 28 | 29 0 4125 30 0 0 | 31 32 33 | 0 34 4126 .ve 4127 4128 This can be represented as a collection of submatrices as: 4129 4130 .vb 4131 A B C 4132 D E F 4133 G H I 4134 .ve 4135 4136 Where the submatrices A,B,C are owned by proc0, D,E,F are 4137 owned by proc1, G,H,I are owned by proc2. 4138 4139 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4140 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4141 The 'M','N' parameters are 8,8, and have the same values on all procs. 4142 4143 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4144 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4145 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4146 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4147 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4148 matrix, ans [DF] as another SeqAIJ matrix. 4149 4150 When d_nz, o_nz parameters are specified, d_nz storage elements are 4151 allocated for every row of the local diagonal submatrix, and o_nz 4152 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4153 One way to choose d_nz and o_nz is to use the max nonzerors per local 4154 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4155 In this case, the values of d_nz,o_nz are: 4156 .vb 4157 proc0 : dnz = 2, o_nz = 2 4158 proc1 : dnz = 3, o_nz = 2 4159 proc2 : dnz = 1, o_nz = 4 4160 .ve 4161 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4162 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4163 for proc3. i.e we are using 12+15+10=37 storage locations to store 4164 34 values. 4165 4166 When d_nnz, o_nnz parameters are specified, the storage is specified 4167 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4168 In the above case the values for d_nnz,o_nnz are: 4169 .vb 4170 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4171 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4172 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4173 .ve 4174 Here the space allocated is sum of all the above values i.e 34, and 4175 hence pre-allocation is perfect. 4176 4177 Level: intermediate 4178 4179 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4180 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4181 @*/ 4182 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4183 { 4184 PetscErrorCode ierr; 4185 4186 PetscFunctionBegin; 4187 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4188 PetscValidType(B,1); 4189 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4190 PetscFunctionReturn(0); 4191 } 4192 4193 /*@ 4194 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4195 CSR format for the local rows. 4196 4197 Collective 4198 4199 Input Parameters: 4200 + comm - MPI communicator 4201 . m - number of local rows (Cannot be PETSC_DECIDE) 4202 . n - This value should be the same as the local size used in creating the 4203 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4204 calculated if N is given) For square matrices n is almost always m. 4205 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4206 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4207 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4208 . j - column indices 4209 - a - matrix values 4210 4211 Output Parameter: 4212 . mat - the matrix 4213 4214 Level: intermediate 4215 4216 Notes: 4217 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4218 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4219 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4220 4221 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4222 4223 The format which is used for the sparse matrix input, is equivalent to a 4224 row-major ordering.. i.e for the following matrix, the input data expected is 4225 as shown 4226 4227 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4228 4229 $ 1 0 0 4230 $ 2 0 3 P0 4231 $ ------- 4232 $ 4 5 6 P1 4233 $ 4234 $ Process0 [P0]: rows_owned=[0,1] 4235 $ i = {0,1,3} [size = nrow+1 = 2+1] 4236 $ j = {0,0,2} [size = 3] 4237 $ v = {1,2,3} [size = 3] 4238 $ 4239 $ Process1 [P1]: rows_owned=[2] 4240 $ i = {0,3} [size = nrow+1 = 1+1] 4241 $ j = {0,1,2} [size = 3] 4242 $ v = {4,5,6} [size = 3] 4243 4244 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4245 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4246 @*/ 4247 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4248 { 4249 PetscErrorCode ierr; 4250 4251 PetscFunctionBegin; 4252 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4253 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4254 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4255 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4256 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4257 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4258 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4259 PetscFunctionReturn(0); 4260 } 4261 4262 /*@ 4263 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4264 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4265 4266 Collective 4267 4268 Input Parameters: 4269 + mat - the matrix 4270 . m - number of local rows (Cannot be PETSC_DECIDE) 4271 . n - This value should be the same as the local size used in creating the 4272 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4273 calculated if N is given) For square matrices n is almost always m. 4274 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4275 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4276 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4277 . J - column indices 4278 - v - matrix values 4279 4280 Level: intermediate 4281 4282 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4283 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4284 @*/ 4285 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4286 { 4287 PetscErrorCode ierr; 4288 PetscInt cstart,nnz,i,j; 4289 PetscInt *ld; 4290 PetscBool nooffprocentries; 4291 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4292 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4293 PetscScalar *ad = Ad->a, *ao = Ao->a; 4294 const PetscInt *Adi = Ad->i; 4295 PetscInt ldi,Iii,md; 4296 4297 PetscFunctionBegin; 4298 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4299 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4300 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4301 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4302 4303 cstart = mat->cmap->rstart; 4304 if (!Aij->ld) { 4305 /* count number of entries below block diagonal */ 4306 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4307 Aij->ld = ld; 4308 for (i=0; i<m; i++) { 4309 nnz = Ii[i+1]- Ii[i]; 4310 j = 0; 4311 while (J[j] < cstart && j < nnz) {j++;} 4312 J += nnz; 4313 ld[i] = j; 4314 } 4315 } else { 4316 ld = Aij->ld; 4317 } 4318 4319 for (i=0; i<m; i++) { 4320 nnz = Ii[i+1]- Ii[i]; 4321 Iii = Ii[i]; 4322 ldi = ld[i]; 4323 md = Adi[i+1]-Adi[i]; 4324 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4325 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4326 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4327 ad += md; 4328 ao += nnz - md; 4329 } 4330 nooffprocentries = mat->nooffprocentries; 4331 mat->nooffprocentries = PETSC_TRUE; 4332 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4333 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4334 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4335 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4336 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4337 mat->nooffprocentries = nooffprocentries; 4338 PetscFunctionReturn(0); 4339 } 4340 4341 /*@C 4342 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4343 (the default parallel PETSc format). For good matrix assembly performance 4344 the user should preallocate the matrix storage by setting the parameters 4345 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4346 performance can be increased by more than a factor of 50. 4347 4348 Collective 4349 4350 Input Parameters: 4351 + comm - MPI communicator 4352 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4353 This value should be the same as the local size used in creating the 4354 y vector for the matrix-vector product y = Ax. 4355 . n - This value should be the same as the local size used in creating the 4356 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4357 calculated if N is given) For square matrices n is almost always m. 4358 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4359 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4360 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4361 (same value is used for all local rows) 4362 . d_nnz - array containing the number of nonzeros in the various rows of the 4363 DIAGONAL portion of the local submatrix (possibly different for each row) 4364 or NULL, if d_nz is used to specify the nonzero structure. 4365 The size of this array is equal to the number of local rows, i.e 'm'. 4366 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4367 submatrix (same value is used for all local rows). 4368 - o_nnz - array containing the number of nonzeros in the various rows of the 4369 OFF-DIAGONAL portion of the local submatrix (possibly different for 4370 each row) or NULL, if o_nz is used to specify the nonzero 4371 structure. The size of this array is equal to the number 4372 of local rows, i.e 'm'. 4373 4374 Output Parameter: 4375 . A - the matrix 4376 4377 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4378 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4379 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4380 4381 Notes: 4382 If the *_nnz parameter is given then the *_nz parameter is ignored 4383 4384 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4385 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4386 storage requirements for this matrix. 4387 4388 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4389 processor than it must be used on all processors that share the object for 4390 that argument. 4391 4392 The user MUST specify either the local or global matrix dimensions 4393 (possibly both). 4394 4395 The parallel matrix is partitioned across processors such that the 4396 first m0 rows belong to process 0, the next m1 rows belong to 4397 process 1, the next m2 rows belong to process 2 etc.. where 4398 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4399 values corresponding to [m x N] submatrix. 4400 4401 The columns are logically partitioned with the n0 columns belonging 4402 to 0th partition, the next n1 columns belonging to the next 4403 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4404 4405 The DIAGONAL portion of the local submatrix on any given processor 4406 is the submatrix corresponding to the rows and columns m,n 4407 corresponding to the given processor. i.e diagonal matrix on 4408 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4409 etc. The remaining portion of the local submatrix [m x (N-n)] 4410 constitute the OFF-DIAGONAL portion. The example below better 4411 illustrates this concept. 4412 4413 For a square global matrix we define each processor's diagonal portion 4414 to be its local rows and the corresponding columns (a square submatrix); 4415 each processor's off-diagonal portion encompasses the remainder of the 4416 local matrix (a rectangular submatrix). 4417 4418 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4419 4420 When calling this routine with a single process communicator, a matrix of 4421 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4422 type of communicator, use the construction mechanism 4423 .vb 4424 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4425 .ve 4426 4427 $ MatCreate(...,&A); 4428 $ MatSetType(A,MATMPIAIJ); 4429 $ MatSetSizes(A, m,n,M,N); 4430 $ MatMPIAIJSetPreallocation(A,...); 4431 4432 By default, this format uses inodes (identical nodes) when possible. 4433 We search for consecutive rows with the same nonzero structure, thereby 4434 reusing matrix information to achieve increased efficiency. 4435 4436 Options Database Keys: 4437 + -mat_no_inode - Do not use inodes 4438 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4439 4440 4441 4442 Example usage: 4443 4444 Consider the following 8x8 matrix with 34 non-zero values, that is 4445 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4446 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4447 as follows 4448 4449 .vb 4450 1 2 0 | 0 3 0 | 0 4 4451 Proc0 0 5 6 | 7 0 0 | 8 0 4452 9 0 10 | 11 0 0 | 12 0 4453 ------------------------------------- 4454 13 0 14 | 15 16 17 | 0 0 4455 Proc1 0 18 0 | 19 20 21 | 0 0 4456 0 0 0 | 22 23 0 | 24 0 4457 ------------------------------------- 4458 Proc2 25 26 27 | 0 0 28 | 29 0 4459 30 0 0 | 31 32 33 | 0 34 4460 .ve 4461 4462 This can be represented as a collection of submatrices as 4463 4464 .vb 4465 A B C 4466 D E F 4467 G H I 4468 .ve 4469 4470 Where the submatrices A,B,C are owned by proc0, D,E,F are 4471 owned by proc1, G,H,I are owned by proc2. 4472 4473 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4474 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4475 The 'M','N' parameters are 8,8, and have the same values on all procs. 4476 4477 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4478 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4479 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4480 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4481 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4482 matrix, ans [DF] as another SeqAIJ matrix. 4483 4484 When d_nz, o_nz parameters are specified, d_nz storage elements are 4485 allocated for every row of the local diagonal submatrix, and o_nz 4486 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4487 One way to choose d_nz and o_nz is to use the max nonzerors per local 4488 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4489 In this case, the values of d_nz,o_nz are 4490 .vb 4491 proc0 : dnz = 2, o_nz = 2 4492 proc1 : dnz = 3, o_nz = 2 4493 proc2 : dnz = 1, o_nz = 4 4494 .ve 4495 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4496 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4497 for proc3. i.e we are using 12+15+10=37 storage locations to store 4498 34 values. 4499 4500 When d_nnz, o_nnz parameters are specified, the storage is specified 4501 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4502 In the above case the values for d_nnz,o_nnz are 4503 .vb 4504 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4505 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4506 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4507 .ve 4508 Here the space allocated is sum of all the above values i.e 34, and 4509 hence pre-allocation is perfect. 4510 4511 Level: intermediate 4512 4513 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4514 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4515 @*/ 4516 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4517 { 4518 PetscErrorCode ierr; 4519 PetscMPIInt size; 4520 4521 PetscFunctionBegin; 4522 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4523 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4524 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4525 if (size > 1) { 4526 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4527 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4528 } else { 4529 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4530 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4531 } 4532 PetscFunctionReturn(0); 4533 } 4534 4535 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4536 { 4537 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4538 PetscBool flg; 4539 PetscErrorCode ierr; 4540 4541 PetscFunctionBegin; 4542 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4543 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4544 if (Ad) *Ad = a->A; 4545 if (Ao) *Ao = a->B; 4546 if (colmap) *colmap = a->garray; 4547 PetscFunctionReturn(0); 4548 } 4549 4550 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4551 { 4552 PetscErrorCode ierr; 4553 PetscInt m,N,i,rstart,nnz,Ii; 4554 PetscInt *indx; 4555 PetscScalar *values; 4556 4557 PetscFunctionBegin; 4558 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4559 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4560 PetscInt *dnz,*onz,sum,bs,cbs; 4561 4562 if (n == PETSC_DECIDE) { 4563 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4564 } 4565 /* Check sum(n) = N */ 4566 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4567 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4568 4569 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4570 rstart -= m; 4571 4572 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4573 for (i=0; i<m; i++) { 4574 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4575 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4576 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4577 } 4578 4579 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4580 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4581 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4582 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4583 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4584 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4585 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4586 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4587 } 4588 4589 /* numeric phase */ 4590 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4591 for (i=0; i<m; i++) { 4592 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4593 Ii = i + rstart; 4594 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4595 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4596 } 4597 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4598 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4599 PetscFunctionReturn(0); 4600 } 4601 4602 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4603 { 4604 PetscErrorCode ierr; 4605 PetscMPIInt rank; 4606 PetscInt m,N,i,rstart,nnz; 4607 size_t len; 4608 const PetscInt *indx; 4609 PetscViewer out; 4610 char *name; 4611 Mat B; 4612 const PetscScalar *values; 4613 4614 PetscFunctionBegin; 4615 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4616 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4617 /* Should this be the type of the diagonal block of A? */ 4618 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4619 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4620 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4621 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4622 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4623 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4624 for (i=0; i<m; i++) { 4625 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4626 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4627 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4628 } 4629 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4630 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4631 4632 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4633 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4634 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4635 sprintf(name,"%s.%d",outfile,rank); 4636 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4637 ierr = PetscFree(name);CHKERRQ(ierr); 4638 ierr = MatView(B,out);CHKERRQ(ierr); 4639 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4640 ierr = MatDestroy(&B);CHKERRQ(ierr); 4641 PetscFunctionReturn(0); 4642 } 4643 4644 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4645 { 4646 PetscErrorCode ierr; 4647 Mat_Merge_SeqsToMPI *merge; 4648 PetscContainer container; 4649 4650 PetscFunctionBegin; 4651 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4652 if (container) { 4653 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4654 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4655 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4656 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4657 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4658 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4659 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4660 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4661 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4662 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4663 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4664 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4665 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4666 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4667 ierr = PetscFree(merge);CHKERRQ(ierr); 4668 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4669 } 4670 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4671 PetscFunctionReturn(0); 4672 } 4673 4674 #include <../src/mat/utils/freespace.h> 4675 #include <petscbt.h> 4676 4677 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4678 { 4679 PetscErrorCode ierr; 4680 MPI_Comm comm; 4681 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4682 PetscMPIInt size,rank,taga,*len_s; 4683 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4684 PetscInt proc,m; 4685 PetscInt **buf_ri,**buf_rj; 4686 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4687 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4688 MPI_Request *s_waits,*r_waits; 4689 MPI_Status *status; 4690 MatScalar *aa=a->a; 4691 MatScalar **abuf_r,*ba_i; 4692 Mat_Merge_SeqsToMPI *merge; 4693 PetscContainer container; 4694 4695 PetscFunctionBegin; 4696 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4697 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4698 4699 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4700 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4701 4702 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4703 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4704 4705 bi = merge->bi; 4706 bj = merge->bj; 4707 buf_ri = merge->buf_ri; 4708 buf_rj = merge->buf_rj; 4709 4710 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4711 owners = merge->rowmap->range; 4712 len_s = merge->len_s; 4713 4714 /* send and recv matrix values */ 4715 /*-----------------------------*/ 4716 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4717 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4718 4719 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4720 for (proc=0,k=0; proc<size; proc++) { 4721 if (!len_s[proc]) continue; 4722 i = owners[proc]; 4723 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4724 k++; 4725 } 4726 4727 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4728 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4729 ierr = PetscFree(status);CHKERRQ(ierr); 4730 4731 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4732 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4733 4734 /* insert mat values of mpimat */ 4735 /*----------------------------*/ 4736 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4737 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4738 4739 for (k=0; k<merge->nrecv; k++) { 4740 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4741 nrows = *(buf_ri_k[k]); 4742 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4743 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4744 } 4745 4746 /* set values of ba */ 4747 m = merge->rowmap->n; 4748 for (i=0; i<m; i++) { 4749 arow = owners[rank] + i; 4750 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4751 bnzi = bi[i+1] - bi[i]; 4752 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4753 4754 /* add local non-zero vals of this proc's seqmat into ba */ 4755 anzi = ai[arow+1] - ai[arow]; 4756 aj = a->j + ai[arow]; 4757 aa = a->a + ai[arow]; 4758 nextaj = 0; 4759 for (j=0; nextaj<anzi; j++) { 4760 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4761 ba_i[j] += aa[nextaj++]; 4762 } 4763 } 4764 4765 /* add received vals into ba */ 4766 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4767 /* i-th row */ 4768 if (i == *nextrow[k]) { 4769 anzi = *(nextai[k]+1) - *nextai[k]; 4770 aj = buf_rj[k] + *(nextai[k]); 4771 aa = abuf_r[k] + *(nextai[k]); 4772 nextaj = 0; 4773 for (j=0; nextaj<anzi; j++) { 4774 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4775 ba_i[j] += aa[nextaj++]; 4776 } 4777 } 4778 nextrow[k]++; nextai[k]++; 4779 } 4780 } 4781 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4782 } 4783 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4784 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4785 4786 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4787 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4788 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4789 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4790 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4791 PetscFunctionReturn(0); 4792 } 4793 4794 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4795 { 4796 PetscErrorCode ierr; 4797 Mat B_mpi; 4798 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4799 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4800 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4801 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4802 PetscInt len,proc,*dnz,*onz,bs,cbs; 4803 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4804 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4805 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4806 MPI_Status *status; 4807 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4808 PetscBT lnkbt; 4809 Mat_Merge_SeqsToMPI *merge; 4810 PetscContainer container; 4811 4812 PetscFunctionBegin; 4813 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4814 4815 /* make sure it is a PETSc comm */ 4816 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4817 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4818 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4819 4820 ierr = PetscNew(&merge);CHKERRQ(ierr); 4821 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4822 4823 /* determine row ownership */ 4824 /*---------------------------------------------------------*/ 4825 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4826 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4827 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4828 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4829 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4830 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4831 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4832 4833 m = merge->rowmap->n; 4834 owners = merge->rowmap->range; 4835 4836 /* determine the number of messages to send, their lengths */ 4837 /*---------------------------------------------------------*/ 4838 len_s = merge->len_s; 4839 4840 len = 0; /* length of buf_si[] */ 4841 merge->nsend = 0; 4842 for (proc=0; proc<size; proc++) { 4843 len_si[proc] = 0; 4844 if (proc == rank) { 4845 len_s[proc] = 0; 4846 } else { 4847 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4848 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4849 } 4850 if (len_s[proc]) { 4851 merge->nsend++; 4852 nrows = 0; 4853 for (i=owners[proc]; i<owners[proc+1]; i++) { 4854 if (ai[i+1] > ai[i]) nrows++; 4855 } 4856 len_si[proc] = 2*(nrows+1); 4857 len += len_si[proc]; 4858 } 4859 } 4860 4861 /* determine the number and length of messages to receive for ij-structure */ 4862 /*-------------------------------------------------------------------------*/ 4863 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4864 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4865 4866 /* post the Irecv of j-structure */ 4867 /*-------------------------------*/ 4868 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4869 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4870 4871 /* post the Isend of j-structure */ 4872 /*--------------------------------*/ 4873 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4874 4875 for (proc=0, k=0; proc<size; proc++) { 4876 if (!len_s[proc]) continue; 4877 i = owners[proc]; 4878 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4879 k++; 4880 } 4881 4882 /* receives and sends of j-structure are complete */ 4883 /*------------------------------------------------*/ 4884 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4885 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4886 4887 /* send and recv i-structure */ 4888 /*---------------------------*/ 4889 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4890 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4891 4892 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4893 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4894 for (proc=0,k=0; proc<size; proc++) { 4895 if (!len_s[proc]) continue; 4896 /* form outgoing message for i-structure: 4897 buf_si[0]: nrows to be sent 4898 [1:nrows]: row index (global) 4899 [nrows+1:2*nrows+1]: i-structure index 4900 */ 4901 /*-------------------------------------------*/ 4902 nrows = len_si[proc]/2 - 1; 4903 buf_si_i = buf_si + nrows+1; 4904 buf_si[0] = nrows; 4905 buf_si_i[0] = 0; 4906 nrows = 0; 4907 for (i=owners[proc]; i<owners[proc+1]; i++) { 4908 anzi = ai[i+1] - ai[i]; 4909 if (anzi) { 4910 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4911 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4912 nrows++; 4913 } 4914 } 4915 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4916 k++; 4917 buf_si += len_si[proc]; 4918 } 4919 4920 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4921 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4922 4923 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4924 for (i=0; i<merge->nrecv; i++) { 4925 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4926 } 4927 4928 ierr = PetscFree(len_si);CHKERRQ(ierr); 4929 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4930 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4931 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4932 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4933 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4934 ierr = PetscFree(status);CHKERRQ(ierr); 4935 4936 /* compute a local seq matrix in each processor */ 4937 /*----------------------------------------------*/ 4938 /* allocate bi array and free space for accumulating nonzero column info */ 4939 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4940 bi[0] = 0; 4941 4942 /* create and initialize a linked list */ 4943 nlnk = N+1; 4944 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4945 4946 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4947 len = ai[owners[rank+1]] - ai[owners[rank]]; 4948 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4949 4950 current_space = free_space; 4951 4952 /* determine symbolic info for each local row */ 4953 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4954 4955 for (k=0; k<merge->nrecv; k++) { 4956 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4957 nrows = *buf_ri_k[k]; 4958 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4959 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4960 } 4961 4962 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4963 len = 0; 4964 for (i=0; i<m; i++) { 4965 bnzi = 0; 4966 /* add local non-zero cols of this proc's seqmat into lnk */ 4967 arow = owners[rank] + i; 4968 anzi = ai[arow+1] - ai[arow]; 4969 aj = a->j + ai[arow]; 4970 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4971 bnzi += nlnk; 4972 /* add received col data into lnk */ 4973 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4974 if (i == *nextrow[k]) { /* i-th row */ 4975 anzi = *(nextai[k]+1) - *nextai[k]; 4976 aj = buf_rj[k] + *nextai[k]; 4977 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4978 bnzi += nlnk; 4979 nextrow[k]++; nextai[k]++; 4980 } 4981 } 4982 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4983 4984 /* if free space is not available, make more free space */ 4985 if (current_space->local_remaining<bnzi) { 4986 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4987 nspacedouble++; 4988 } 4989 /* copy data into free space, then initialize lnk */ 4990 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4991 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4992 4993 current_space->array += bnzi; 4994 current_space->local_used += bnzi; 4995 current_space->local_remaining -= bnzi; 4996 4997 bi[i+1] = bi[i] + bnzi; 4998 } 4999 5000 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5001 5002 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5003 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5004 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5005 5006 /* create symbolic parallel matrix B_mpi */ 5007 /*---------------------------------------*/ 5008 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5009 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5010 if (n==PETSC_DECIDE) { 5011 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5012 } else { 5013 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5014 } 5015 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5016 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5017 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5018 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5019 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5020 5021 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5022 B_mpi->assembled = PETSC_FALSE; 5023 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5024 merge->bi = bi; 5025 merge->bj = bj; 5026 merge->buf_ri = buf_ri; 5027 merge->buf_rj = buf_rj; 5028 merge->coi = NULL; 5029 merge->coj = NULL; 5030 merge->owners_co = NULL; 5031 5032 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5033 5034 /* attach the supporting struct to B_mpi for reuse */ 5035 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5036 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5037 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5038 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5039 *mpimat = B_mpi; 5040 5041 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5042 PetscFunctionReturn(0); 5043 } 5044 5045 /*@C 5046 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5047 matrices from each processor 5048 5049 Collective 5050 5051 Input Parameters: 5052 + comm - the communicators the parallel matrix will live on 5053 . seqmat - the input sequential matrices 5054 . m - number of local rows (or PETSC_DECIDE) 5055 . n - number of local columns (or PETSC_DECIDE) 5056 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5057 5058 Output Parameter: 5059 . mpimat - the parallel matrix generated 5060 5061 Level: advanced 5062 5063 Notes: 5064 The dimensions of the sequential matrix in each processor MUST be the same. 5065 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5066 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5067 @*/ 5068 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5069 { 5070 PetscErrorCode ierr; 5071 PetscMPIInt size; 5072 5073 PetscFunctionBegin; 5074 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5075 if (size == 1) { 5076 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5077 if (scall == MAT_INITIAL_MATRIX) { 5078 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5079 } else { 5080 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5081 } 5082 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5083 PetscFunctionReturn(0); 5084 } 5085 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5086 if (scall == MAT_INITIAL_MATRIX) { 5087 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5088 } 5089 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5090 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5091 PetscFunctionReturn(0); 5092 } 5093 5094 /*@ 5095 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5096 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5097 with MatGetSize() 5098 5099 Not Collective 5100 5101 Input Parameters: 5102 + A - the matrix 5103 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5104 5105 Output Parameter: 5106 . A_loc - the local sequential matrix generated 5107 5108 Level: developer 5109 5110 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5111 5112 @*/ 5113 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5114 { 5115 PetscErrorCode ierr; 5116 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5117 Mat_SeqAIJ *mat,*a,*b; 5118 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5119 MatScalar *aa,*ba,*cam; 5120 PetscScalar *ca; 5121 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5122 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5123 PetscBool match; 5124 MPI_Comm comm; 5125 PetscMPIInt size; 5126 5127 PetscFunctionBegin; 5128 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5129 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5130 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5131 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5132 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5133 5134 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5135 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5136 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5137 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5138 aa = a->a; ba = b->a; 5139 if (scall == MAT_INITIAL_MATRIX) { 5140 if (size == 1) { 5141 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5142 PetscFunctionReturn(0); 5143 } 5144 5145 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5146 ci[0] = 0; 5147 for (i=0; i<am; i++) { 5148 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5149 } 5150 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5151 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5152 k = 0; 5153 for (i=0; i<am; i++) { 5154 ncols_o = bi[i+1] - bi[i]; 5155 ncols_d = ai[i+1] - ai[i]; 5156 /* off-diagonal portion of A */ 5157 for (jo=0; jo<ncols_o; jo++) { 5158 col = cmap[*bj]; 5159 if (col >= cstart) break; 5160 cj[k] = col; bj++; 5161 ca[k++] = *ba++; 5162 } 5163 /* diagonal portion of A */ 5164 for (j=0; j<ncols_d; j++) { 5165 cj[k] = cstart + *aj++; 5166 ca[k++] = *aa++; 5167 } 5168 /* off-diagonal portion of A */ 5169 for (j=jo; j<ncols_o; j++) { 5170 cj[k] = cmap[*bj++]; 5171 ca[k++] = *ba++; 5172 } 5173 } 5174 /* put together the new matrix */ 5175 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5176 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5177 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5178 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5179 mat->free_a = PETSC_TRUE; 5180 mat->free_ij = PETSC_TRUE; 5181 mat->nonew = 0; 5182 } else if (scall == MAT_REUSE_MATRIX) { 5183 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5184 ci = mat->i; cj = mat->j; cam = mat->a; 5185 for (i=0; i<am; i++) { 5186 /* off-diagonal portion of A */ 5187 ncols_o = bi[i+1] - bi[i]; 5188 for (jo=0; jo<ncols_o; jo++) { 5189 col = cmap[*bj]; 5190 if (col >= cstart) break; 5191 *cam++ = *ba++; bj++; 5192 } 5193 /* diagonal portion of A */ 5194 ncols_d = ai[i+1] - ai[i]; 5195 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5196 /* off-diagonal portion of A */ 5197 for (j=jo; j<ncols_o; j++) { 5198 *cam++ = *ba++; bj++; 5199 } 5200 } 5201 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5202 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5203 PetscFunctionReturn(0); 5204 } 5205 5206 /*@C 5207 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5208 5209 Not Collective 5210 5211 Input Parameters: 5212 + A - the matrix 5213 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5214 - row, col - index sets of rows and columns to extract (or NULL) 5215 5216 Output Parameter: 5217 . A_loc - the local sequential matrix generated 5218 5219 Level: developer 5220 5221 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5222 5223 @*/ 5224 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5225 { 5226 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5227 PetscErrorCode ierr; 5228 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5229 IS isrowa,iscola; 5230 Mat *aloc; 5231 PetscBool match; 5232 5233 PetscFunctionBegin; 5234 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5235 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5236 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5237 if (!row) { 5238 start = A->rmap->rstart; end = A->rmap->rend; 5239 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5240 } else { 5241 isrowa = *row; 5242 } 5243 if (!col) { 5244 start = A->cmap->rstart; 5245 cmap = a->garray; 5246 nzA = a->A->cmap->n; 5247 nzB = a->B->cmap->n; 5248 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5249 ncols = 0; 5250 for (i=0; i<nzB; i++) { 5251 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5252 else break; 5253 } 5254 imark = i; 5255 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5256 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5257 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5258 } else { 5259 iscola = *col; 5260 } 5261 if (scall != MAT_INITIAL_MATRIX) { 5262 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5263 aloc[0] = *A_loc; 5264 } 5265 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5266 if (!col) { /* attach global id of condensed columns */ 5267 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5268 } 5269 *A_loc = aloc[0]; 5270 ierr = PetscFree(aloc);CHKERRQ(ierr); 5271 if (!row) { 5272 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5273 } 5274 if (!col) { 5275 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5276 } 5277 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5278 PetscFunctionReturn(0); 5279 } 5280 5281 /* 5282 * Destroy a mat that may be composed with PetscSF communication objects. 5283 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5284 * */ 5285 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5286 { 5287 PetscSF sf,osf; 5288 IS map; 5289 PetscErrorCode ierr; 5290 5291 PetscFunctionBegin; 5292 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5293 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5294 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5295 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5296 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5297 ierr = ISDestroy(&map);CHKERRQ(ierr); 5298 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5299 PetscFunctionReturn(0); 5300 } 5301 5302 /* 5303 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5304 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5305 * on a global size. 5306 * */ 5307 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5308 { 5309 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5310 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5311 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,owner,lidx,*nrcols,*nlcols,ncol; 5312 PetscSFNode *iremote,*oiremote; 5313 const PetscInt *lrowindices; 5314 PetscErrorCode ierr; 5315 PetscSF sf,osf; 5316 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5317 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5318 MPI_Comm comm; 5319 ISLocalToGlobalMapping mapping; 5320 5321 PetscFunctionBegin; 5322 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5323 /* plocalsize is the number of roots 5324 * nrows is the number of leaves 5325 * */ 5326 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5327 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5328 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5329 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5330 for (i=0;i<nrows;i++) { 5331 /* Find a remote index and an owner for a row 5332 * The row could be local or remote 5333 * */ 5334 owner = 0; 5335 lidx = 0; 5336 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5337 iremote[i].index = lidx; 5338 iremote[i].rank = owner; 5339 } 5340 /* Create SF to communicate how many nonzero columns for each row */ 5341 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5342 /* SF will figure out the number of nonzero colunms for each row, and their 5343 * offsets 5344 * */ 5345 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5346 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5347 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5348 5349 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5350 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5351 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5352 roffsets[0] = 0; 5353 roffsets[1] = 0; 5354 for (i=0;i<plocalsize;i++) { 5355 /* diag */ 5356 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5357 /* off diag */ 5358 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5359 /* compute offsets so that we relative location for each row */ 5360 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5361 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5362 } 5363 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5364 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5365 /* 'r' means root, and 'l' means leaf */ 5366 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5367 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5368 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5369 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5370 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5371 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5372 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5373 dntotalcols = 0; 5374 ontotalcols = 0; 5375 ncol = 0; 5376 for (i=0;i<nrows;i++) { 5377 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5378 ncol = PetscMax(pnnz[i],ncol); 5379 /* diag */ 5380 dntotalcols += nlcols[i*2+0]; 5381 /* off diag */ 5382 ontotalcols += nlcols[i*2+1]; 5383 } 5384 /* We do not need to figure the right number of columns 5385 * since all the calculations will be done by going through the raw data 5386 * */ 5387 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5388 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5389 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5390 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5391 /* diag */ 5392 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5393 /* off diag */ 5394 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5395 /* diag */ 5396 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5397 /* off diag */ 5398 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5399 dntotalcols = 0; 5400 ontotalcols = 0; 5401 ntotalcols = 0; 5402 for (i=0;i<nrows;i++) { 5403 owner = 0; 5404 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5405 /* Set iremote for diag matrix */ 5406 for (j=0;j<nlcols[i*2+0];j++) { 5407 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5408 iremote[dntotalcols].rank = owner; 5409 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5410 ilocal[dntotalcols++] = ntotalcols++; 5411 } 5412 /* off diag */ 5413 for (j=0;j<nlcols[i*2+1];j++) { 5414 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5415 oiremote[ontotalcols].rank = owner; 5416 oilocal[ontotalcols++] = ntotalcols++; 5417 } 5418 } 5419 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5420 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5421 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5422 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5423 /* P serves as roots and P_oth is leaves 5424 * Diag matrix 5425 * */ 5426 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5427 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5428 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5429 5430 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5431 /* Off diag */ 5432 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5433 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5434 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5435 /* We operate on the matrix internal data for saving memory */ 5436 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5437 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5438 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5439 /* Convert to global indices for diag matrix */ 5440 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5441 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5442 /* We want P_oth store global indices */ 5443 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5444 /* Use memory scalable approach */ 5445 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5446 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5447 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5448 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5449 /* Convert back to local indices */ 5450 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5451 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5452 nout = 0; 5453 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5454 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5455 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5456 /* Exchange values */ 5457 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5458 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5459 /* Stop PETSc from shrinking memory */ 5460 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5461 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5462 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5463 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5464 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5465 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5466 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5467 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5468 PetscFunctionReturn(0); 5469 } 5470 5471 /* 5472 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5473 * This supports MPIAIJ and MAIJ 5474 * */ 5475 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5476 { 5477 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5478 Mat_SeqAIJ *p_oth; 5479 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5480 IS rows,map; 5481 PetscHMapI hamp; 5482 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5483 MPI_Comm comm; 5484 PetscSF sf,osf; 5485 PetscBool has; 5486 PetscErrorCode ierr; 5487 5488 PetscFunctionBegin; 5489 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5490 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5491 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5492 * and then create a submatrix (that often is an overlapping matrix) 5493 * */ 5494 if (reuse==MAT_INITIAL_MATRIX) { 5495 /* Use a hash table to figure out unique keys */ 5496 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5497 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5498 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5499 count = 0; 5500 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5501 for (i=0;i<a->B->cmap->n;i++) { 5502 key = a->garray[i]/dof; 5503 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5504 if (!has) { 5505 mapping[i] = count; 5506 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5507 } else { 5508 /* Current 'i' has the same value the previous step */ 5509 mapping[i] = count-1; 5510 } 5511 } 5512 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5513 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5514 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5515 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5516 off = 0; 5517 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5518 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5519 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5520 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5521 /* In case, the matrix was already created but users want to recreate the matrix */ 5522 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5523 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5524 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5525 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5526 } else if (reuse==MAT_REUSE_MATRIX) { 5527 /* If matrix was already created, we simply update values using SF objects 5528 * that as attached to the matrix ealier. 5529 * */ 5530 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5531 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5532 if (!sf || !osf) { 5533 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5534 } 5535 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5536 /* Update values in place */ 5537 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5538 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5539 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5540 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5541 } else { 5542 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5543 } 5544 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5545 PetscFunctionReturn(0); 5546 } 5547 5548 /*@C 5549 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5550 5551 Collective on Mat 5552 5553 Input Parameters: 5554 + A,B - the matrices in mpiaij format 5555 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5556 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5557 5558 Output Parameter: 5559 + rowb, colb - index sets of rows and columns of B to extract 5560 - B_seq - the sequential matrix generated 5561 5562 Level: developer 5563 5564 @*/ 5565 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5566 { 5567 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5568 PetscErrorCode ierr; 5569 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5570 IS isrowb,iscolb; 5571 Mat *bseq=NULL; 5572 5573 PetscFunctionBegin; 5574 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5575 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5576 } 5577 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5578 5579 if (scall == MAT_INITIAL_MATRIX) { 5580 start = A->cmap->rstart; 5581 cmap = a->garray; 5582 nzA = a->A->cmap->n; 5583 nzB = a->B->cmap->n; 5584 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5585 ncols = 0; 5586 for (i=0; i<nzB; i++) { /* row < local row index */ 5587 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5588 else break; 5589 } 5590 imark = i; 5591 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5592 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5593 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5594 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5595 } else { 5596 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5597 isrowb = *rowb; iscolb = *colb; 5598 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5599 bseq[0] = *B_seq; 5600 } 5601 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5602 *B_seq = bseq[0]; 5603 ierr = PetscFree(bseq);CHKERRQ(ierr); 5604 if (!rowb) { 5605 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5606 } else { 5607 *rowb = isrowb; 5608 } 5609 if (!colb) { 5610 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5611 } else { 5612 *colb = iscolb; 5613 } 5614 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5615 PetscFunctionReturn(0); 5616 } 5617 5618 /* 5619 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5620 of the OFF-DIAGONAL portion of local A 5621 5622 Collective on Mat 5623 5624 Input Parameters: 5625 + A,B - the matrices in mpiaij format 5626 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5627 5628 Output Parameter: 5629 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5630 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5631 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5632 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5633 5634 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5635 for this matrix. This is not desirable.. 5636 5637 Level: developer 5638 5639 */ 5640 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5641 { 5642 PetscErrorCode ierr; 5643 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5644 Mat_SeqAIJ *b_oth; 5645 VecScatter ctx; 5646 MPI_Comm comm; 5647 const PetscMPIInt *rprocs,*sprocs; 5648 const PetscInt *srow,*rstarts,*sstarts; 5649 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5650 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5651 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5652 MPI_Request *rwaits = NULL,*swaits = NULL; 5653 MPI_Status rstatus; 5654 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5655 5656 PetscFunctionBegin; 5657 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5658 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5659 5660 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5661 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5662 } 5663 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5664 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5665 5666 if (size == 1) { 5667 startsj_s = NULL; 5668 bufa_ptr = NULL; 5669 *B_oth = NULL; 5670 PetscFunctionReturn(0); 5671 } 5672 5673 ctx = a->Mvctx; 5674 tag = ((PetscObject)ctx)->tag; 5675 5676 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5677 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5678 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5679 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5680 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5681 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5682 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5683 5684 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5685 if (scall == MAT_INITIAL_MATRIX) { 5686 /* i-array */ 5687 /*---------*/ 5688 /* post receives */ 5689 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5690 for (i=0; i<nrecvs; i++) { 5691 rowlen = rvalues + rstarts[i]*rbs; 5692 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5693 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5694 } 5695 5696 /* pack the outgoing message */ 5697 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5698 5699 sstartsj[0] = 0; 5700 rstartsj[0] = 0; 5701 len = 0; /* total length of j or a array to be sent */ 5702 if (nsends) { 5703 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5704 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5705 } 5706 for (i=0; i<nsends; i++) { 5707 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5708 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5709 for (j=0; j<nrows; j++) { 5710 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5711 for (l=0; l<sbs; l++) { 5712 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5713 5714 rowlen[j*sbs+l] = ncols; 5715 5716 len += ncols; 5717 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5718 } 5719 k++; 5720 } 5721 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5722 5723 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5724 } 5725 /* recvs and sends of i-array are completed */ 5726 i = nrecvs; 5727 while (i--) { 5728 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5729 } 5730 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5731 ierr = PetscFree(svalues);CHKERRQ(ierr); 5732 5733 /* allocate buffers for sending j and a arrays */ 5734 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5735 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5736 5737 /* create i-array of B_oth */ 5738 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5739 5740 b_othi[0] = 0; 5741 len = 0; /* total length of j or a array to be received */ 5742 k = 0; 5743 for (i=0; i<nrecvs; i++) { 5744 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5745 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5746 for (j=0; j<nrows; j++) { 5747 b_othi[k+1] = b_othi[k] + rowlen[j]; 5748 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5749 k++; 5750 } 5751 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5752 } 5753 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5754 5755 /* allocate space for j and a arrrays of B_oth */ 5756 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5757 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5758 5759 /* j-array */ 5760 /*---------*/ 5761 /* post receives of j-array */ 5762 for (i=0; i<nrecvs; i++) { 5763 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5764 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5765 } 5766 5767 /* pack the outgoing message j-array */ 5768 if (nsends) k = sstarts[0]; 5769 for (i=0; i<nsends; i++) { 5770 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5771 bufJ = bufj+sstartsj[i]; 5772 for (j=0; j<nrows; j++) { 5773 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5774 for (ll=0; ll<sbs; ll++) { 5775 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5776 for (l=0; l<ncols; l++) { 5777 *bufJ++ = cols[l]; 5778 } 5779 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5780 } 5781 } 5782 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5783 } 5784 5785 /* recvs and sends of j-array are completed */ 5786 i = nrecvs; 5787 while (i--) { 5788 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5789 } 5790 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5791 } else if (scall == MAT_REUSE_MATRIX) { 5792 sstartsj = *startsj_s; 5793 rstartsj = *startsj_r; 5794 bufa = *bufa_ptr; 5795 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5796 b_otha = b_oth->a; 5797 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5798 5799 /* a-array */ 5800 /*---------*/ 5801 /* post receives of a-array */ 5802 for (i=0; i<nrecvs; i++) { 5803 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5804 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5805 } 5806 5807 /* pack the outgoing message a-array */ 5808 if (nsends) k = sstarts[0]; 5809 for (i=0; i<nsends; i++) { 5810 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5811 bufA = bufa+sstartsj[i]; 5812 for (j=0; j<nrows; j++) { 5813 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5814 for (ll=0; ll<sbs; ll++) { 5815 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5816 for (l=0; l<ncols; l++) { 5817 *bufA++ = vals[l]; 5818 } 5819 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5820 } 5821 } 5822 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5823 } 5824 /* recvs and sends of a-array are completed */ 5825 i = nrecvs; 5826 while (i--) { 5827 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5828 } 5829 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5830 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5831 5832 if (scall == MAT_INITIAL_MATRIX) { 5833 /* put together the new matrix */ 5834 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5835 5836 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5837 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5838 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5839 b_oth->free_a = PETSC_TRUE; 5840 b_oth->free_ij = PETSC_TRUE; 5841 b_oth->nonew = 0; 5842 5843 ierr = PetscFree(bufj);CHKERRQ(ierr); 5844 if (!startsj_s || !bufa_ptr) { 5845 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5846 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5847 } else { 5848 *startsj_s = sstartsj; 5849 *startsj_r = rstartsj; 5850 *bufa_ptr = bufa; 5851 } 5852 } 5853 5854 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5855 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5856 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5857 PetscFunctionReturn(0); 5858 } 5859 5860 /*@C 5861 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5862 5863 Not Collective 5864 5865 Input Parameters: 5866 . A - The matrix in mpiaij format 5867 5868 Output Parameter: 5869 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5870 . colmap - A map from global column index to local index into lvec 5871 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5872 5873 Level: developer 5874 5875 @*/ 5876 #if defined(PETSC_USE_CTABLE) 5877 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5878 #else 5879 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5880 #endif 5881 { 5882 Mat_MPIAIJ *a; 5883 5884 PetscFunctionBegin; 5885 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5886 PetscValidPointer(lvec, 2); 5887 PetscValidPointer(colmap, 3); 5888 PetscValidPointer(multScatter, 4); 5889 a = (Mat_MPIAIJ*) A->data; 5890 if (lvec) *lvec = a->lvec; 5891 if (colmap) *colmap = a->colmap; 5892 if (multScatter) *multScatter = a->Mvctx; 5893 PetscFunctionReturn(0); 5894 } 5895 5896 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5897 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5898 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5899 #if defined(PETSC_HAVE_MKL_SPARSE) 5900 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5901 #endif 5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5903 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5904 #if defined(PETSC_HAVE_ELEMENTAL) 5905 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5906 #endif 5907 #if defined(PETSC_HAVE_HYPRE) 5908 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5909 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5910 #endif 5911 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5912 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5913 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5914 5915 /* 5916 Computes (B'*A')' since computing B*A directly is untenable 5917 5918 n p p 5919 ( ) ( ) ( ) 5920 m ( A ) * n ( B ) = m ( C ) 5921 ( ) ( ) ( ) 5922 5923 */ 5924 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5925 { 5926 PetscErrorCode ierr; 5927 Mat At,Bt,Ct; 5928 5929 PetscFunctionBegin; 5930 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5931 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5932 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5933 ierr = MatDestroy(&At);CHKERRQ(ierr); 5934 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5935 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5936 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5937 PetscFunctionReturn(0); 5938 } 5939 5940 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5941 { 5942 PetscErrorCode ierr; 5943 PetscInt m=A->rmap->n,n=B->cmap->n; 5944 Mat Cmat; 5945 5946 PetscFunctionBegin; 5947 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5948 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5949 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5950 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5951 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5952 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5953 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5954 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5955 5956 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5957 5958 *C = Cmat; 5959 PetscFunctionReturn(0); 5960 } 5961 5962 /* ----------------------------------------------------------------*/ 5963 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5964 { 5965 PetscErrorCode ierr; 5966 5967 PetscFunctionBegin; 5968 if (scall == MAT_INITIAL_MATRIX) { 5969 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5970 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5971 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5972 } 5973 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5974 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5975 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5976 PetscFunctionReturn(0); 5977 } 5978 5979 /*MC 5980 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5981 5982 Options Database Keys: 5983 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5984 5985 Level: beginner 5986 5987 Notes: 5988 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5989 in this case the values associated with the rows and columns one passes in are set to zero 5990 in the matrix 5991 5992 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5993 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5994 5995 .seealso: MatCreateAIJ() 5996 M*/ 5997 5998 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5999 { 6000 Mat_MPIAIJ *b; 6001 PetscErrorCode ierr; 6002 PetscMPIInt size; 6003 6004 PetscFunctionBegin; 6005 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 6006 6007 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6008 B->data = (void*)b; 6009 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6010 B->assembled = PETSC_FALSE; 6011 B->insertmode = NOT_SET_VALUES; 6012 b->size = size; 6013 6014 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 6015 6016 /* build cache for off array entries formed */ 6017 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6018 6019 b->donotstash = PETSC_FALSE; 6020 b->colmap = 0; 6021 b->garray = 0; 6022 b->roworiented = PETSC_TRUE; 6023 6024 /* stuff used for matrix vector multiply */ 6025 b->lvec = NULL; 6026 b->Mvctx = NULL; 6027 6028 /* stuff for MatGetRow() */ 6029 b->rowindices = 0; 6030 b->rowvalues = 0; 6031 b->getrowactive = PETSC_FALSE; 6032 6033 /* flexible pointer used in CUSP/CUSPARSE classes */ 6034 b->spptr = NULL; 6035 6036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6046 #if defined(PETSC_HAVE_MKL_SPARSE) 6047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6048 #endif 6049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6052 #if defined(PETSC_HAVE_ELEMENTAL) 6053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6054 #endif 6055 #if defined(PETSC_HAVE_HYPRE) 6056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6057 #endif 6058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 6061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 6062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 6063 #if defined(PETSC_HAVE_HYPRE) 6064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6065 #endif 6066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 6067 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6068 PetscFunctionReturn(0); 6069 } 6070 6071 /*@C 6072 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6073 and "off-diagonal" part of the matrix in CSR format. 6074 6075 Collective 6076 6077 Input Parameters: 6078 + comm - MPI communicator 6079 . m - number of local rows (Cannot be PETSC_DECIDE) 6080 . n - This value should be the same as the local size used in creating the 6081 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6082 calculated if N is given) For square matrices n is almost always m. 6083 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6084 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6085 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6086 . j - column indices 6087 . a - matrix values 6088 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6089 . oj - column indices 6090 - oa - matrix values 6091 6092 Output Parameter: 6093 . mat - the matrix 6094 6095 Level: advanced 6096 6097 Notes: 6098 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6099 must free the arrays once the matrix has been destroyed and not before. 6100 6101 The i and j indices are 0 based 6102 6103 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6104 6105 This sets local rows and cannot be used to set off-processor values. 6106 6107 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6108 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6109 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6110 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6111 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6112 communication if it is known that only local entries will be set. 6113 6114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6115 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6116 @*/ 6117 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6118 { 6119 PetscErrorCode ierr; 6120 Mat_MPIAIJ *maij; 6121 6122 PetscFunctionBegin; 6123 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6124 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6125 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6126 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6127 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6128 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6129 maij = (Mat_MPIAIJ*) (*mat)->data; 6130 6131 (*mat)->preallocated = PETSC_TRUE; 6132 6133 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6134 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6135 6136 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6137 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6138 6139 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6140 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6141 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6142 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6143 6144 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6145 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6146 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6147 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6148 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6149 PetscFunctionReturn(0); 6150 } 6151 6152 /* 6153 Special version for direct calls from Fortran 6154 */ 6155 #include <petsc/private/fortranimpl.h> 6156 6157 /* Change these macros so can be used in void function */ 6158 #undef CHKERRQ 6159 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6160 #undef SETERRQ2 6161 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6162 #undef SETERRQ3 6163 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6164 #undef SETERRQ 6165 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6166 6167 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6168 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6169 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6170 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6171 #else 6172 #endif 6173 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6174 { 6175 Mat mat = *mmat; 6176 PetscInt m = *mm, n = *mn; 6177 InsertMode addv = *maddv; 6178 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6179 PetscScalar value; 6180 PetscErrorCode ierr; 6181 6182 MatCheckPreallocated(mat,1); 6183 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6184 6185 #if defined(PETSC_USE_DEBUG) 6186 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6187 #endif 6188 { 6189 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6190 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6191 PetscBool roworiented = aij->roworiented; 6192 6193 /* Some Variables required in the macro */ 6194 Mat A = aij->A; 6195 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6196 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6197 MatScalar *aa = a->a; 6198 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6199 Mat B = aij->B; 6200 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6201 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6202 MatScalar *ba = b->a; 6203 6204 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6205 PetscInt nonew = a->nonew; 6206 MatScalar *ap1,*ap2; 6207 6208 PetscFunctionBegin; 6209 for (i=0; i<m; i++) { 6210 if (im[i] < 0) continue; 6211 #if defined(PETSC_USE_DEBUG) 6212 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6213 #endif 6214 if (im[i] >= rstart && im[i] < rend) { 6215 row = im[i] - rstart; 6216 lastcol1 = -1; 6217 rp1 = aj + ai[row]; 6218 ap1 = aa + ai[row]; 6219 rmax1 = aimax[row]; 6220 nrow1 = ailen[row]; 6221 low1 = 0; 6222 high1 = nrow1; 6223 lastcol2 = -1; 6224 rp2 = bj + bi[row]; 6225 ap2 = ba + bi[row]; 6226 rmax2 = bimax[row]; 6227 nrow2 = bilen[row]; 6228 low2 = 0; 6229 high2 = nrow2; 6230 6231 for (j=0; j<n; j++) { 6232 if (roworiented) value = v[i*n+j]; 6233 else value = v[i+j*m]; 6234 if (in[j] >= cstart && in[j] < cend) { 6235 col = in[j] - cstart; 6236 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 6237 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6238 } else if (in[j] < 0) continue; 6239 #if defined(PETSC_USE_DEBUG) 6240 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6241 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6242 #endif 6243 else { 6244 if (mat->was_assembled) { 6245 if (!aij->colmap) { 6246 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6247 } 6248 #if defined(PETSC_USE_CTABLE) 6249 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6250 col--; 6251 #else 6252 col = aij->colmap[in[j]] - 1; 6253 #endif 6254 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 6255 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6256 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6257 col = in[j]; 6258 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6259 B = aij->B; 6260 b = (Mat_SeqAIJ*)B->data; 6261 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6262 rp2 = bj + bi[row]; 6263 ap2 = ba + bi[row]; 6264 rmax2 = bimax[row]; 6265 nrow2 = bilen[row]; 6266 low2 = 0; 6267 high2 = nrow2; 6268 bm = aij->B->rmap->n; 6269 ba = b->a; 6270 } 6271 } else col = in[j]; 6272 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6273 } 6274 } 6275 } else if (!aij->donotstash) { 6276 if (roworiented) { 6277 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6278 } else { 6279 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6280 } 6281 } 6282 } 6283 } 6284 PetscFunctionReturnVoid(); 6285 } 6286