1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 796 797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 803 PetscFunctionBegin; 804 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 805 806 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 807 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 808 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 813 { 814 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 816 PetscErrorCode ierr; 817 PetscMPIInt n; 818 PetscInt i,j,rstart,ncols,flg; 819 PetscInt *row,*col; 820 PetscBool other_disassembled; 821 PetscScalar *val; 822 823 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 824 825 PetscFunctionBegin; 826 if (!aij->donotstash && !mat->nooffprocentries) { 827 while (1) { 828 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 829 if (!flg) break; 830 831 for (i=0; i<n; ) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j=i,rstart=row[j]; j<n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j-i; 837 else ncols = n-i; 838 /* Now assemble all these values with a single function call */ 839 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 840 841 i = j; 842 } 843 } 844 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 845 } 846 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 847 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 848 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 849 if (mat->boundtocpu) { 850 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 851 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 852 } 853 #endif 854 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 855 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 856 857 /* determine if any processor has disassembled, if so we must 858 also disassemble ourself, in order that we may reassemble. */ 859 /* 860 if nonzero structure of submatrix B cannot change then we know that 861 no processor disassembled thus we can skip this stuff 862 */ 863 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 864 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 865 if (mat->was_assembled && !other_disassembled) { 866 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 867 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 868 #endif 869 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 870 } 871 } 872 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 873 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 874 } 875 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 876 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 877 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 878 #endif 879 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 880 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 881 882 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 883 884 aij->rowvalues = 0; 885 886 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 887 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 888 889 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 890 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 891 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 892 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 893 } 894 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 895 mat->offloadmask = PETSC_OFFLOAD_BOTH; 896 #endif 897 PetscFunctionReturn(0); 898 } 899 900 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 901 { 902 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 903 PetscErrorCode ierr; 904 905 PetscFunctionBegin; 906 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 907 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 912 { 913 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 914 PetscObjectState sA, sB; 915 PetscInt *lrows; 916 PetscInt r, len; 917 PetscBool cong, lch, gch; 918 PetscErrorCode ierr; 919 920 PetscFunctionBegin; 921 /* get locally owned rows */ 922 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 923 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 924 /* fix right hand side if needed */ 925 if (x && b) { 926 const PetscScalar *xx; 927 PetscScalar *bb; 928 929 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 930 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 931 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 932 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 933 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 934 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 935 } 936 937 sA = mat->A->nonzerostate; 938 sB = mat->B->nonzerostate; 939 940 if (diag != 0.0 && cong) { 941 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 942 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 943 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 944 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 945 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 946 PetscInt nnwA, nnwB; 947 PetscBool nnzA, nnzB; 948 949 nnwA = aijA->nonew; 950 nnwB = aijB->nonew; 951 nnzA = aijA->keepnonzeropattern; 952 nnzB = aijB->keepnonzeropattern; 953 if (!nnzA) { 954 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 955 aijA->nonew = 0; 956 } 957 if (!nnzB) { 958 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 959 aijB->nonew = 0; 960 } 961 /* Must zero here before the next loop */ 962 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 964 for (r = 0; r < len; ++r) { 965 const PetscInt row = lrows[r] + A->rmap->rstart; 966 if (row >= A->cmap->N) continue; 967 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 968 } 969 aijA->nonew = nnwA; 970 aijB->nonew = nnwB; 971 } else { 972 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 974 } 975 ierr = PetscFree(lrows);CHKERRQ(ierr); 976 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 979 /* reduce nonzerostate */ 980 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 981 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 982 if (gch) A->nonzerostate++; 983 PetscFunctionReturn(0); 984 } 985 986 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 987 { 988 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 989 PetscErrorCode ierr; 990 PetscMPIInt n = A->rmap->n; 991 PetscInt i,j,r,m,len = 0; 992 PetscInt *lrows,*owners = A->rmap->range; 993 PetscMPIInt p = 0; 994 PetscSFNode *rrows; 995 PetscSF sf; 996 const PetscScalar *xx; 997 PetscScalar *bb,*mask; 998 Vec xmask,lmask; 999 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 1000 const PetscInt *aj, *ii,*ridx; 1001 PetscScalar *aa; 1002 1003 PetscFunctionBegin; 1004 /* Create SF where leaves are input rows and roots are owned rows */ 1005 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1006 for (r = 0; r < n; ++r) lrows[r] = -1; 1007 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1008 for (r = 0; r < N; ++r) { 1009 const PetscInt idx = rows[r]; 1010 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1011 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1012 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1013 } 1014 rrows[r].rank = p; 1015 rrows[r].index = rows[r] - owners[p]; 1016 } 1017 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1018 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1019 /* Collect flags for rows to be zeroed */ 1020 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1022 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1023 /* Compress and put in row numbers */ 1024 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1025 /* zero diagonal part of matrix */ 1026 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1027 /* handle off diagonal part of matrix */ 1028 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1029 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1030 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1031 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1032 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1033 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1036 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1037 PetscBool cong; 1038 1039 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1040 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1041 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1043 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1044 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1045 } 1046 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1047 /* remove zeroed rows of off diagonal matrix */ 1048 ii = aij->i; 1049 for (i=0; i<len; i++) { 1050 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1051 } 1052 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1053 if (aij->compressedrow.use) { 1054 m = aij->compressedrow.nrows; 1055 ii = aij->compressedrow.i; 1056 ridx = aij->compressedrow.rindex; 1057 for (i=0; i<m; i++) { 1058 n = ii[i+1] - ii[i]; 1059 aj = aij->j + ii[i]; 1060 aa = aij->a + ii[i]; 1061 1062 for (j=0; j<n; j++) { 1063 if (PetscAbsScalar(mask[*aj])) { 1064 if (b) bb[*ridx] -= *aa*xx[*aj]; 1065 *aa = 0.0; 1066 } 1067 aa++; 1068 aj++; 1069 } 1070 ridx++; 1071 } 1072 } else { /* do not use compressed row format */ 1073 m = l->B->rmap->n; 1074 for (i=0; i<m; i++) { 1075 n = ii[i+1] - ii[i]; 1076 aj = aij->j + ii[i]; 1077 aa = aij->a + ii[i]; 1078 for (j=0; j<n; j++) { 1079 if (PetscAbsScalar(mask[*aj])) { 1080 if (b) bb[i] -= *aa*xx[*aj]; 1081 *aa = 0.0; 1082 } 1083 aa++; 1084 aj++; 1085 } 1086 } 1087 } 1088 if (x && b) { 1089 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1090 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1091 } 1092 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1093 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1094 ierr = PetscFree(lrows);CHKERRQ(ierr); 1095 1096 /* only change matrix nonzero state if pattern was allowed to be changed */ 1097 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1098 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1099 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1100 } 1101 PetscFunctionReturn(0); 1102 } 1103 1104 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1105 { 1106 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 PetscInt nt; 1109 VecScatter Mvctx = a->Mvctx; 1110 1111 PetscFunctionBegin; 1112 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1113 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1114 1115 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1117 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1123 { 1124 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1125 PetscErrorCode ierr; 1126 1127 PetscFunctionBegin; 1128 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 VecScatter Mvctx = a->Mvctx; 1137 1138 PetscFunctionBegin; 1139 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1140 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1142 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 /* do nondiagonal part */ 1154 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1155 /* do local part */ 1156 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1157 /* add partial results together */ 1158 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1164 { 1165 MPI_Comm comm; 1166 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1167 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1168 IS Me,Notme; 1169 PetscErrorCode ierr; 1170 PetscInt M,N,first,last,*notme,i; 1171 PetscBool lf; 1172 PetscMPIInt size; 1173 1174 PetscFunctionBegin; 1175 /* Easy test: symmetric diagonal block */ 1176 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1177 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1178 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1179 if (!*f) PetscFunctionReturn(0); 1180 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1181 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1182 if (size == 1) PetscFunctionReturn(0); 1183 1184 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1185 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1186 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1187 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1188 for (i=0; i<first; i++) notme[i] = i; 1189 for (i=last; i<M; i++) notme[i-last+first] = i; 1190 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1191 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1192 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1193 Aoff = Aoffs[0]; 1194 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1195 Boff = Boffs[0]; 1196 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1197 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1198 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1199 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1200 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1201 ierr = PetscFree(notme);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1206 { 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1215 { 1216 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 1219 PetscFunctionBegin; 1220 /* do nondiagonal part */ 1221 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1222 /* do local part */ 1223 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1224 /* add partial results together */ 1225 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1226 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 /* 1231 This only works correctly for square matrices where the subblock A->A is the 1232 diagonal block 1233 */ 1234 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1235 { 1236 PetscErrorCode ierr; 1237 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1238 1239 PetscFunctionBegin; 1240 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1241 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1242 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1247 { 1248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1253 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1258 { 1259 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 #if defined(PETSC_USE_LOG) 1264 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1265 #endif 1266 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1267 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1268 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1269 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1270 #if defined(PETSC_USE_CTABLE) 1271 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1272 #else 1273 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1274 #endif 1275 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1276 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1277 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1278 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1279 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1280 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1281 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1282 1283 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1293 #if defined(PETSC_HAVE_ELEMENTAL) 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1295 #endif 1296 #if defined(PETSC_HAVE_HYPRE) 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1307 { 1308 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1309 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1310 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1311 const PetscInt *garray = aij->garray; 1312 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1313 PetscInt *rowlens; 1314 PetscInt *colidxs; 1315 PetscScalar *matvals; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1320 1321 M = mat->rmap->N; 1322 N = mat->cmap->N; 1323 m = mat->rmap->n; 1324 rs = mat->rmap->rstart; 1325 cs = mat->cmap->rstart; 1326 nz = A->nz + B->nz; 1327 1328 /* write matrix header */ 1329 header[0] = MAT_FILE_CLASSID; 1330 header[1] = M; header[2] = N; header[3] = nz; 1331 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1332 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1333 1334 /* fill in and store row lengths */ 1335 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1336 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1337 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1338 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1339 1340 /* fill in and store column indices */ 1341 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1342 for (cnt=0, i=0; i<m; i++) { 1343 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1344 if (garray[B->j[jb]] > cs) break; 1345 colidxs[cnt++] = garray[B->j[jb]]; 1346 } 1347 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1348 colidxs[cnt++] = A->j[ja] + cs; 1349 for (; jb<B->i[i+1]; jb++) 1350 colidxs[cnt++] = garray[B->j[jb]]; 1351 } 1352 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1353 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1354 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1355 1356 /* fill in and store nonzero values */ 1357 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1358 for (cnt=0, i=0; i<m; i++) { 1359 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1360 if (garray[B->j[jb]] > cs) break; 1361 matvals[cnt++] = B->a[jb]; 1362 } 1363 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1364 matvals[cnt++] = A->a[ja]; 1365 for (; jb<B->i[i+1]; jb++) 1366 matvals[cnt++] = B->a[jb]; 1367 } 1368 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1369 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1370 ierr = PetscFree(matvals);CHKERRQ(ierr); 1371 1372 /* write block size option to the viewer's .info file */ 1373 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #include <petscdraw.h> 1378 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1379 { 1380 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1381 PetscErrorCode ierr; 1382 PetscMPIInt rank = aij->rank,size = aij->size; 1383 PetscBool isdraw,iascii,isbinary; 1384 PetscViewer sviewer; 1385 PetscViewerFormat format; 1386 1387 PetscFunctionBegin; 1388 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1389 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1390 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1391 if (iascii) { 1392 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1393 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1394 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1395 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1396 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 for (i=0; i<(PetscInt)size; i++) { 1398 nmax = PetscMax(nmax,nz[i]); 1399 nmin = PetscMin(nmin,nz[i]); 1400 navg += nz[i]; 1401 } 1402 ierr = PetscFree(nz);CHKERRQ(ierr); 1403 navg = navg/size; 1404 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1408 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1409 MatInfo info; 1410 PetscBool inodes; 1411 1412 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1413 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1414 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1415 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1416 if (!inodes) { 1417 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1418 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1419 } else { 1420 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1421 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1422 } 1423 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1424 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1425 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1426 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1427 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1429 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1430 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1433 PetscInt inodecount,inodelimit,*inodes; 1434 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1435 if (inodes) { 1436 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1437 } else { 1438 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1439 } 1440 PetscFunctionReturn(0); 1441 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1442 PetscFunctionReturn(0); 1443 } 1444 } else if (isbinary) { 1445 if (size == 1) { 1446 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1447 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1448 } else { 1449 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1450 } 1451 PetscFunctionReturn(0); 1452 } else if (iascii && size == 1) { 1453 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1454 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } else if (isdraw) { 1457 PetscDraw draw; 1458 PetscBool isnull; 1459 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1460 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1461 if (isnull) PetscFunctionReturn(0); 1462 } 1463 1464 { /* assemble the entire matrix onto first processor */ 1465 Mat A = NULL, Av; 1466 IS isrow,iscol; 1467 1468 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1469 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1470 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1471 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1472 /* The commented code uses MatCreateSubMatrices instead */ 1473 /* 1474 Mat *AA, A = NULL, Av; 1475 IS isrow,iscol; 1476 1477 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1478 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1479 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1480 if (!rank) { 1481 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1482 A = AA[0]; 1483 Av = AA[0]; 1484 } 1485 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1486 */ 1487 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1488 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1489 /* 1490 Everyone has to call to draw the matrix since the graphics waits are 1491 synchronized across all processors that share the PetscDraw object 1492 */ 1493 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1494 if (!rank) { 1495 if (((PetscObject)mat)->name) { 1496 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1497 } 1498 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1499 } 1500 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1501 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1502 ierr = MatDestroy(&A);CHKERRQ(ierr); 1503 } 1504 PetscFunctionReturn(0); 1505 } 1506 1507 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1508 { 1509 PetscErrorCode ierr; 1510 PetscBool iascii,isdraw,issocket,isbinary; 1511 1512 PetscFunctionBegin; 1513 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1514 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1515 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1516 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1517 if (iascii || isdraw || isbinary || issocket) { 1518 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1519 } 1520 PetscFunctionReturn(0); 1521 } 1522 1523 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1524 { 1525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1526 PetscErrorCode ierr; 1527 Vec bb1 = 0; 1528 PetscBool hasop; 1529 1530 PetscFunctionBegin; 1531 if (flag == SOR_APPLY_UPPER) { 1532 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1533 PetscFunctionReturn(0); 1534 } 1535 1536 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1537 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1538 } 1539 1540 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1541 if (flag & SOR_ZERO_INITIAL_GUESS) { 1542 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1543 its--; 1544 } 1545 1546 while (its--) { 1547 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1549 1550 /* update rhs: bb1 = bb - B*x */ 1551 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1552 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1553 1554 /* local sweep */ 1555 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1556 } 1557 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1558 if (flag & SOR_ZERO_INITIAL_GUESS) { 1559 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1560 its--; 1561 } 1562 while (its--) { 1563 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1564 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1565 1566 /* update rhs: bb1 = bb - B*x */ 1567 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1568 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1569 1570 /* local sweep */ 1571 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1572 } 1573 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1574 if (flag & SOR_ZERO_INITIAL_GUESS) { 1575 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1576 its--; 1577 } 1578 while (its--) { 1579 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1581 1582 /* update rhs: bb1 = bb - B*x */ 1583 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1584 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1585 1586 /* local sweep */ 1587 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1588 } 1589 } else if (flag & SOR_EISENSTAT) { 1590 Vec xx1; 1591 1592 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1593 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1594 1595 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1596 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1597 if (!mat->diag) { 1598 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1599 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1600 } 1601 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1602 if (hasop) { 1603 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1604 } else { 1605 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1606 } 1607 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1608 1609 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1610 1611 /* local sweep */ 1612 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1613 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1614 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1615 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1616 1617 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1618 1619 matin->factorerrortype = mat->A->factorerrortype; 1620 PetscFunctionReturn(0); 1621 } 1622 1623 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1624 { 1625 Mat aA,aB,Aperm; 1626 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1627 PetscScalar *aa,*ba; 1628 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1629 PetscSF rowsf,sf; 1630 IS parcolp = NULL; 1631 PetscBool done; 1632 PetscErrorCode ierr; 1633 1634 PetscFunctionBegin; 1635 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1636 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1637 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1638 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1639 1640 /* Invert row permutation to find out where my rows should go */ 1641 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1642 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1643 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1644 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1645 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1646 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1647 1648 /* Invert column permutation to find out where my columns should go */ 1649 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1650 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1651 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1652 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1653 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1654 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1655 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1656 1657 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1658 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1659 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1660 1661 /* Find out where my gcols should go */ 1662 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1663 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1664 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1665 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1666 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1667 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1668 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1669 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1670 1671 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1672 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1673 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1674 for (i=0; i<m; i++) { 1675 PetscInt row = rdest[i]; 1676 PetscMPIInt rowner; 1677 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1678 for (j=ai[i]; j<ai[i+1]; j++) { 1679 PetscInt col = cdest[aj[j]]; 1680 PetscMPIInt cowner; 1681 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1682 if (rowner == cowner) dnnz[i]++; 1683 else onnz[i]++; 1684 } 1685 for (j=bi[i]; j<bi[i+1]; j++) { 1686 PetscInt col = gcdest[bj[j]]; 1687 PetscMPIInt cowner; 1688 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1689 if (rowner == cowner) dnnz[i]++; 1690 else onnz[i]++; 1691 } 1692 } 1693 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1694 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1695 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1696 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1697 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1698 1699 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1700 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1701 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1702 for (i=0; i<m; i++) { 1703 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1704 PetscInt j0,rowlen; 1705 rowlen = ai[i+1] - ai[i]; 1706 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1707 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1708 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1709 } 1710 rowlen = bi[i+1] - bi[i]; 1711 for (j0=j=0; j<rowlen; j0=j) { 1712 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1713 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1714 } 1715 } 1716 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1717 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1718 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1719 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1720 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1721 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1722 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1723 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1724 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1725 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1726 *B = Aperm; 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1731 { 1732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1733 PetscErrorCode ierr; 1734 1735 PetscFunctionBegin; 1736 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1737 if (ghosts) *ghosts = aij->garray; 1738 PetscFunctionReturn(0); 1739 } 1740 1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1744 Mat A = mat->A,B = mat->B; 1745 PetscErrorCode ierr; 1746 PetscLogDouble isend[5],irecv[5]; 1747 1748 PetscFunctionBegin; 1749 info->block_size = 1.0; 1750 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1751 1752 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1753 isend[3] = info->memory; isend[4] = info->mallocs; 1754 1755 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1756 1757 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1758 isend[3] += info->memory; isend[4] += info->mallocs; 1759 if (flag == MAT_LOCAL) { 1760 info->nz_used = isend[0]; 1761 info->nz_allocated = isend[1]; 1762 info->nz_unneeded = isend[2]; 1763 info->memory = isend[3]; 1764 info->mallocs = isend[4]; 1765 } else if (flag == MAT_GLOBAL_MAX) { 1766 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1767 1768 info->nz_used = irecv[0]; 1769 info->nz_allocated = irecv[1]; 1770 info->nz_unneeded = irecv[2]; 1771 info->memory = irecv[3]; 1772 info->mallocs = irecv[4]; 1773 } else if (flag == MAT_GLOBAL_SUM) { 1774 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1775 1776 info->nz_used = irecv[0]; 1777 info->nz_allocated = irecv[1]; 1778 info->nz_unneeded = irecv[2]; 1779 info->memory = irecv[3]; 1780 info->mallocs = irecv[4]; 1781 } 1782 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1783 info->fill_ratio_needed = 0; 1784 info->factor_mallocs = 0; 1785 PetscFunctionReturn(0); 1786 } 1787 1788 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1789 { 1790 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1791 PetscErrorCode ierr; 1792 1793 PetscFunctionBegin; 1794 switch (op) { 1795 case MAT_NEW_NONZERO_LOCATIONS: 1796 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1797 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1798 case MAT_KEEP_NONZERO_PATTERN: 1799 case MAT_NEW_NONZERO_LOCATION_ERR: 1800 case MAT_USE_INODES: 1801 case MAT_IGNORE_ZERO_ENTRIES: 1802 MatCheckPreallocated(A,1); 1803 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1804 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1805 break; 1806 case MAT_ROW_ORIENTED: 1807 MatCheckPreallocated(A,1); 1808 a->roworiented = flg; 1809 1810 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1811 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1812 break; 1813 case MAT_NEW_DIAGONALS: 1814 case MAT_SORTED_FULL: 1815 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1816 break; 1817 case MAT_IGNORE_OFF_PROC_ENTRIES: 1818 a->donotstash = flg; 1819 break; 1820 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1821 case MAT_SPD: 1822 case MAT_SYMMETRIC: 1823 case MAT_STRUCTURALLY_SYMMETRIC: 1824 case MAT_HERMITIAN: 1825 case MAT_SYMMETRY_ETERNAL: 1826 break; 1827 case MAT_SUBMAT_SINGLEIS: 1828 A->submat_singleis = flg; 1829 break; 1830 case MAT_STRUCTURE_ONLY: 1831 /* The option is handled directly by MatSetOption() */ 1832 break; 1833 default: 1834 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1835 } 1836 PetscFunctionReturn(0); 1837 } 1838 1839 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1840 { 1841 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1842 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1843 PetscErrorCode ierr; 1844 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1845 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1846 PetscInt *cmap,*idx_p; 1847 1848 PetscFunctionBegin; 1849 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1850 mat->getrowactive = PETSC_TRUE; 1851 1852 if (!mat->rowvalues && (idx || v)) { 1853 /* 1854 allocate enough space to hold information from the longest row. 1855 */ 1856 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1857 PetscInt max = 1,tmp; 1858 for (i=0; i<matin->rmap->n; i++) { 1859 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1860 if (max < tmp) max = tmp; 1861 } 1862 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1863 } 1864 1865 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1866 lrow = row - rstart; 1867 1868 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1869 if (!v) {pvA = 0; pvB = 0;} 1870 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1871 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1872 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1873 nztot = nzA + nzB; 1874 1875 cmap = mat->garray; 1876 if (v || idx) { 1877 if (nztot) { 1878 /* Sort by increasing column numbers, assuming A and B already sorted */ 1879 PetscInt imark = -1; 1880 if (v) { 1881 *v = v_p = mat->rowvalues; 1882 for (i=0; i<nzB; i++) { 1883 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1884 else break; 1885 } 1886 imark = i; 1887 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1888 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1889 } 1890 if (idx) { 1891 *idx = idx_p = mat->rowindices; 1892 if (imark > -1) { 1893 for (i=0; i<imark; i++) { 1894 idx_p[i] = cmap[cworkB[i]]; 1895 } 1896 } else { 1897 for (i=0; i<nzB; i++) { 1898 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1899 else break; 1900 } 1901 imark = i; 1902 } 1903 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1904 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1905 } 1906 } else { 1907 if (idx) *idx = 0; 1908 if (v) *v = 0; 1909 } 1910 } 1911 *nz = nztot; 1912 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1913 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1914 PetscFunctionReturn(0); 1915 } 1916 1917 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1918 { 1919 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1920 1921 PetscFunctionBegin; 1922 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1923 aij->getrowactive = PETSC_FALSE; 1924 PetscFunctionReturn(0); 1925 } 1926 1927 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1928 { 1929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1930 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1931 PetscErrorCode ierr; 1932 PetscInt i,j,cstart = mat->cmap->rstart; 1933 PetscReal sum = 0.0; 1934 MatScalar *v; 1935 1936 PetscFunctionBegin; 1937 if (aij->size == 1) { 1938 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1939 } else { 1940 if (type == NORM_FROBENIUS) { 1941 v = amat->a; 1942 for (i=0; i<amat->nz; i++) { 1943 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1944 } 1945 v = bmat->a; 1946 for (i=0; i<bmat->nz; i++) { 1947 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1948 } 1949 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1950 *norm = PetscSqrtReal(*norm); 1951 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1952 } else if (type == NORM_1) { /* max column norm */ 1953 PetscReal *tmp,*tmp2; 1954 PetscInt *jj,*garray = aij->garray; 1955 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1956 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1957 *norm = 0.0; 1958 v = amat->a; jj = amat->j; 1959 for (j=0; j<amat->nz; j++) { 1960 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1961 } 1962 v = bmat->a; jj = bmat->j; 1963 for (j=0; j<bmat->nz; j++) { 1964 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1965 } 1966 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1967 for (j=0; j<mat->cmap->N; j++) { 1968 if (tmp2[j] > *norm) *norm = tmp2[j]; 1969 } 1970 ierr = PetscFree(tmp);CHKERRQ(ierr); 1971 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1972 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1973 } else if (type == NORM_INFINITY) { /* max row norm */ 1974 PetscReal ntemp = 0.0; 1975 for (j=0; j<aij->A->rmap->n; j++) { 1976 v = amat->a + amat->i[j]; 1977 sum = 0.0; 1978 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1979 sum += PetscAbsScalar(*v); v++; 1980 } 1981 v = bmat->a + bmat->i[j]; 1982 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1983 sum += PetscAbsScalar(*v); v++; 1984 } 1985 if (sum > ntemp) ntemp = sum; 1986 } 1987 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1988 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1989 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1990 } 1991 PetscFunctionReturn(0); 1992 } 1993 1994 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1995 { 1996 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1997 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1998 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1999 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2000 PetscErrorCode ierr; 2001 Mat B,A_diag,*B_diag; 2002 const MatScalar *array; 2003 2004 PetscFunctionBegin; 2005 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2006 ai = Aloc->i; aj = Aloc->j; 2007 bi = Bloc->i; bj = Bloc->j; 2008 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2009 PetscInt *d_nnz,*g_nnz,*o_nnz; 2010 PetscSFNode *oloc; 2011 PETSC_UNUSED PetscSF sf; 2012 2013 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2014 /* compute d_nnz for preallocation */ 2015 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2016 for (i=0; i<ai[ma]; i++) { 2017 d_nnz[aj[i]]++; 2018 } 2019 /* compute local off-diagonal contributions */ 2020 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2021 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2022 /* map those to global */ 2023 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2024 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2025 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2026 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2027 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2028 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2029 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2030 2031 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2032 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2033 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2034 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2035 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2036 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2037 } else { 2038 B = *matout; 2039 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2040 } 2041 2042 b = (Mat_MPIAIJ*)B->data; 2043 A_diag = a->A; 2044 B_diag = &b->A; 2045 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2046 A_diag_ncol = A_diag->cmap->N; 2047 B_diag_ilen = sub_B_diag->ilen; 2048 B_diag_i = sub_B_diag->i; 2049 2050 /* Set ilen for diagonal of B */ 2051 for (i=0; i<A_diag_ncol; i++) { 2052 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2053 } 2054 2055 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2056 very quickly (=without using MatSetValues), because all writes are local. */ 2057 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2058 2059 /* copy over the B part */ 2060 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2061 array = Bloc->a; 2062 row = A->rmap->rstart; 2063 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2064 cols_tmp = cols; 2065 for (i=0; i<mb; i++) { 2066 ncol = bi[i+1]-bi[i]; 2067 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2068 row++; 2069 array += ncol; cols_tmp += ncol; 2070 } 2071 ierr = PetscFree(cols);CHKERRQ(ierr); 2072 2073 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2074 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2075 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2076 *matout = B; 2077 } else { 2078 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2079 } 2080 PetscFunctionReturn(0); 2081 } 2082 2083 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2084 { 2085 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2086 Mat a = aij->A,b = aij->B; 2087 PetscErrorCode ierr; 2088 PetscInt s1,s2,s3; 2089 2090 PetscFunctionBegin; 2091 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2092 if (rr) { 2093 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2094 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2095 /* Overlap communication with computation. */ 2096 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2097 } 2098 if (ll) { 2099 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2100 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2101 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2102 } 2103 /* scale the diagonal block */ 2104 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2105 2106 if (rr) { 2107 /* Do a scatter end and then right scale the off-diagonal block */ 2108 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2109 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2110 } 2111 PetscFunctionReturn(0); 2112 } 2113 2114 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2115 { 2116 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2117 PetscErrorCode ierr; 2118 2119 PetscFunctionBegin; 2120 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2121 PetscFunctionReturn(0); 2122 } 2123 2124 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2125 { 2126 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2127 Mat a,b,c,d; 2128 PetscBool flg; 2129 PetscErrorCode ierr; 2130 2131 PetscFunctionBegin; 2132 a = matA->A; b = matA->B; 2133 c = matB->A; d = matB->B; 2134 2135 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2136 if (flg) { 2137 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2138 } 2139 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2140 PetscFunctionReturn(0); 2141 } 2142 2143 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2144 { 2145 PetscErrorCode ierr; 2146 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2147 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2148 2149 PetscFunctionBegin; 2150 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2151 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2152 /* because of the column compression in the off-processor part of the matrix a->B, 2153 the number of columns in a->B and b->B may be different, hence we cannot call 2154 the MatCopy() directly on the two parts. If need be, we can provide a more 2155 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2156 then copying the submatrices */ 2157 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2158 } else { 2159 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2160 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2161 } 2162 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2167 { 2168 PetscErrorCode ierr; 2169 2170 PetscFunctionBegin; 2171 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2172 PetscFunctionReturn(0); 2173 } 2174 2175 /* 2176 Computes the number of nonzeros per row needed for preallocation when X and Y 2177 have different nonzero structure. 2178 */ 2179 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2180 { 2181 PetscInt i,j,k,nzx,nzy; 2182 2183 PetscFunctionBegin; 2184 /* Set the number of nonzeros in the new matrix */ 2185 for (i=0; i<m; i++) { 2186 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2187 nzx = xi[i+1] - xi[i]; 2188 nzy = yi[i+1] - yi[i]; 2189 nnz[i] = 0; 2190 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2191 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2192 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2193 nnz[i]++; 2194 } 2195 for (; k<nzy; k++) nnz[i]++; 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2201 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2202 { 2203 PetscErrorCode ierr; 2204 PetscInt m = Y->rmap->N; 2205 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2206 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2207 2208 PetscFunctionBegin; 2209 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2210 PetscFunctionReturn(0); 2211 } 2212 2213 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2214 { 2215 PetscErrorCode ierr; 2216 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2217 PetscBLASInt bnz,one=1; 2218 Mat_SeqAIJ *x,*y; 2219 2220 PetscFunctionBegin; 2221 if (str == SAME_NONZERO_PATTERN) { 2222 PetscScalar alpha = a; 2223 x = (Mat_SeqAIJ*)xx->A->data; 2224 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2225 y = (Mat_SeqAIJ*)yy->A->data; 2226 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2227 x = (Mat_SeqAIJ*)xx->B->data; 2228 y = (Mat_SeqAIJ*)yy->B->data; 2229 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2230 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2231 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2232 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2233 will be updated */ 2234 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2235 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2236 Y->offloadmask = PETSC_OFFLOAD_CPU; 2237 } 2238 #endif 2239 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2240 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2241 } else { 2242 Mat B; 2243 PetscInt *nnz_d,*nnz_o; 2244 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2245 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2246 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2247 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2248 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2249 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2250 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2251 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2253 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2254 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2255 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2256 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 2262 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2263 2264 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2265 { 2266 #if defined(PETSC_USE_COMPLEX) 2267 PetscErrorCode ierr; 2268 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2269 2270 PetscFunctionBegin; 2271 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2272 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2273 #else 2274 PetscFunctionBegin; 2275 #endif 2276 PetscFunctionReturn(0); 2277 } 2278 2279 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2280 { 2281 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2282 PetscErrorCode ierr; 2283 2284 PetscFunctionBegin; 2285 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2286 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2287 PetscFunctionReturn(0); 2288 } 2289 2290 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2291 { 2292 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2293 PetscErrorCode ierr; 2294 2295 PetscFunctionBegin; 2296 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2297 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2302 { 2303 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2304 PetscErrorCode ierr; 2305 PetscInt i,*idxb = 0; 2306 PetscScalar *va,*vb; 2307 Vec vtmp; 2308 2309 PetscFunctionBegin; 2310 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2311 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2312 if (idx) { 2313 for (i=0; i<A->rmap->n; i++) { 2314 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2315 } 2316 } 2317 2318 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2319 if (idx) { 2320 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2321 } 2322 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2323 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2324 2325 for (i=0; i<A->rmap->n; i++) { 2326 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2327 va[i] = vb[i]; 2328 if (idx) idx[i] = a->garray[idxb[i]]; 2329 } 2330 } 2331 2332 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2333 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2334 ierr = PetscFree(idxb);CHKERRQ(ierr); 2335 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2336 PetscFunctionReturn(0); 2337 } 2338 2339 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2340 { 2341 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2342 PetscErrorCode ierr; 2343 PetscInt i,*idxb = 0; 2344 PetscScalar *va,*vb; 2345 Vec vtmp; 2346 2347 PetscFunctionBegin; 2348 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2349 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2350 if (idx) { 2351 for (i=0; i<A->cmap->n; i++) { 2352 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2353 } 2354 } 2355 2356 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2357 if (idx) { 2358 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2359 } 2360 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2361 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2362 2363 for (i=0; i<A->rmap->n; i++) { 2364 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2365 va[i] = vb[i]; 2366 if (idx) idx[i] = a->garray[idxb[i]]; 2367 } 2368 } 2369 2370 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2371 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2372 ierr = PetscFree(idxb);CHKERRQ(ierr); 2373 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2374 PetscFunctionReturn(0); 2375 } 2376 2377 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2378 { 2379 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2380 PetscInt n = A->rmap->n; 2381 PetscInt cstart = A->cmap->rstart; 2382 PetscInt *cmap = mat->garray; 2383 PetscInt *diagIdx, *offdiagIdx; 2384 Vec diagV, offdiagV; 2385 PetscScalar *a, *diagA, *offdiagA; 2386 PetscInt r; 2387 PetscErrorCode ierr; 2388 2389 PetscFunctionBegin; 2390 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2391 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2392 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2393 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2394 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2395 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2396 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2397 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2398 for (r = 0; r < n; ++r) { 2399 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2400 a[r] = diagA[r]; 2401 idx[r] = cstart + diagIdx[r]; 2402 } else { 2403 a[r] = offdiagA[r]; 2404 idx[r] = cmap[offdiagIdx[r]]; 2405 } 2406 } 2407 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2408 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2409 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2410 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2411 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2412 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2413 PetscFunctionReturn(0); 2414 } 2415 2416 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2417 { 2418 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2419 PetscInt n = A->rmap->n; 2420 PetscInt cstart = A->cmap->rstart; 2421 PetscInt *cmap = mat->garray; 2422 PetscInt *diagIdx, *offdiagIdx; 2423 Vec diagV, offdiagV; 2424 PetscScalar *a, *diagA, *offdiagA; 2425 PetscInt r; 2426 PetscErrorCode ierr; 2427 2428 PetscFunctionBegin; 2429 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2430 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2431 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2432 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2433 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2434 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2435 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2436 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2437 for (r = 0; r < n; ++r) { 2438 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2439 a[r] = diagA[r]; 2440 idx[r] = cstart + diagIdx[r]; 2441 } else { 2442 a[r] = offdiagA[r]; 2443 idx[r] = cmap[offdiagIdx[r]]; 2444 } 2445 } 2446 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2447 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2448 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2449 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2450 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2451 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2456 { 2457 PetscErrorCode ierr; 2458 Mat *dummy; 2459 2460 PetscFunctionBegin; 2461 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2462 *newmat = *dummy; 2463 ierr = PetscFree(dummy);CHKERRQ(ierr); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2468 { 2469 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2470 PetscErrorCode ierr; 2471 2472 PetscFunctionBegin; 2473 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2474 A->factorerrortype = a->A->factorerrortype; 2475 PetscFunctionReturn(0); 2476 } 2477 2478 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2479 { 2480 PetscErrorCode ierr; 2481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2482 2483 PetscFunctionBegin; 2484 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2485 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2486 if (x->assembled) { 2487 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2488 } else { 2489 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2490 } 2491 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2492 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2493 PetscFunctionReturn(0); 2494 } 2495 2496 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2497 { 2498 PetscFunctionBegin; 2499 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2500 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2501 PetscFunctionReturn(0); 2502 } 2503 2504 /*@ 2505 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2506 2507 Collective on Mat 2508 2509 Input Parameters: 2510 + A - the matrix 2511 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2512 2513 Level: advanced 2514 2515 @*/ 2516 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2517 { 2518 PetscErrorCode ierr; 2519 2520 PetscFunctionBegin; 2521 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2522 PetscFunctionReturn(0); 2523 } 2524 2525 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2526 { 2527 PetscErrorCode ierr; 2528 PetscBool sc = PETSC_FALSE,flg; 2529 2530 PetscFunctionBegin; 2531 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2532 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2533 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2534 if (flg) { 2535 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2536 } 2537 ierr = PetscOptionsTail();CHKERRQ(ierr); 2538 PetscFunctionReturn(0); 2539 } 2540 2541 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2542 { 2543 PetscErrorCode ierr; 2544 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2545 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2546 2547 PetscFunctionBegin; 2548 if (!Y->preallocated) { 2549 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2550 } else if (!aij->nz) { 2551 PetscInt nonew = aij->nonew; 2552 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2553 aij->nonew = nonew; 2554 } 2555 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2556 PetscFunctionReturn(0); 2557 } 2558 2559 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2560 { 2561 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2562 PetscErrorCode ierr; 2563 2564 PetscFunctionBegin; 2565 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2566 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2567 if (d) { 2568 PetscInt rstart; 2569 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2570 *d += rstart; 2571 2572 } 2573 PetscFunctionReturn(0); 2574 } 2575 2576 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2577 { 2578 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2579 PetscErrorCode ierr; 2580 2581 PetscFunctionBegin; 2582 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2583 PetscFunctionReturn(0); 2584 } 2585 2586 /* -------------------------------------------------------------------*/ 2587 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2588 MatGetRow_MPIAIJ, 2589 MatRestoreRow_MPIAIJ, 2590 MatMult_MPIAIJ, 2591 /* 4*/ MatMultAdd_MPIAIJ, 2592 MatMultTranspose_MPIAIJ, 2593 MatMultTransposeAdd_MPIAIJ, 2594 0, 2595 0, 2596 0, 2597 /*10*/ 0, 2598 0, 2599 0, 2600 MatSOR_MPIAIJ, 2601 MatTranspose_MPIAIJ, 2602 /*15*/ MatGetInfo_MPIAIJ, 2603 MatEqual_MPIAIJ, 2604 MatGetDiagonal_MPIAIJ, 2605 MatDiagonalScale_MPIAIJ, 2606 MatNorm_MPIAIJ, 2607 /*20*/ MatAssemblyBegin_MPIAIJ, 2608 MatAssemblyEnd_MPIAIJ, 2609 MatSetOption_MPIAIJ, 2610 MatZeroEntries_MPIAIJ, 2611 /*24*/ MatZeroRows_MPIAIJ, 2612 0, 2613 0, 2614 0, 2615 0, 2616 /*29*/ MatSetUp_MPIAIJ, 2617 0, 2618 0, 2619 MatGetDiagonalBlock_MPIAIJ, 2620 0, 2621 /*34*/ MatDuplicate_MPIAIJ, 2622 0, 2623 0, 2624 0, 2625 0, 2626 /*39*/ MatAXPY_MPIAIJ, 2627 MatCreateSubMatrices_MPIAIJ, 2628 MatIncreaseOverlap_MPIAIJ, 2629 MatGetValues_MPIAIJ, 2630 MatCopy_MPIAIJ, 2631 /*44*/ MatGetRowMax_MPIAIJ, 2632 MatScale_MPIAIJ, 2633 MatShift_MPIAIJ, 2634 MatDiagonalSet_MPIAIJ, 2635 MatZeroRowsColumns_MPIAIJ, 2636 /*49*/ MatSetRandom_MPIAIJ, 2637 0, 2638 0, 2639 0, 2640 0, 2641 /*54*/ MatFDColoringCreate_MPIXAIJ, 2642 0, 2643 MatSetUnfactored_MPIAIJ, 2644 MatPermute_MPIAIJ, 2645 0, 2646 /*59*/ MatCreateSubMatrix_MPIAIJ, 2647 MatDestroy_MPIAIJ, 2648 MatView_MPIAIJ, 2649 0, 2650 0, 2651 /*64*/ 0, 2652 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2653 0, 2654 0, 2655 0, 2656 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2657 MatGetRowMinAbs_MPIAIJ, 2658 0, 2659 0, 2660 0, 2661 0, 2662 /*75*/ MatFDColoringApply_AIJ, 2663 MatSetFromOptions_MPIAIJ, 2664 0, 2665 0, 2666 MatFindZeroDiagonals_MPIAIJ, 2667 /*80*/ 0, 2668 0, 2669 0, 2670 /*83*/ MatLoad_MPIAIJ, 2671 MatIsSymmetric_MPIAIJ, 2672 0, 2673 0, 2674 0, 2675 0, 2676 /*89*/ 0, 2677 0, 2678 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2679 0, 2680 0, 2681 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2682 0, 2683 0, 2684 0, 2685 MatBindToCPU_MPIAIJ, 2686 /*99*/ MatProductSetFromOptions_MPIAIJ, 2687 0, 2688 0, 2689 MatConjugate_MPIAIJ, 2690 0, 2691 /*104*/MatSetValuesRow_MPIAIJ, 2692 MatRealPart_MPIAIJ, 2693 MatImaginaryPart_MPIAIJ, 2694 0, 2695 0, 2696 /*109*/0, 2697 0, 2698 MatGetRowMin_MPIAIJ, 2699 0, 2700 MatMissingDiagonal_MPIAIJ, 2701 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2702 0, 2703 MatGetGhosts_MPIAIJ, 2704 0, 2705 0, 2706 /*119*/0, 2707 0, 2708 0, 2709 0, 2710 MatGetMultiProcBlock_MPIAIJ, 2711 /*124*/MatFindNonzeroRows_MPIAIJ, 2712 MatGetColumnNorms_MPIAIJ, 2713 MatInvertBlockDiagonal_MPIAIJ, 2714 MatInvertVariableBlockDiagonal_MPIAIJ, 2715 MatCreateSubMatricesMPI_MPIAIJ, 2716 /*129*/0, 2717 0, 2718 0, 2719 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2720 0, 2721 /*134*/0, 2722 0, 2723 0, 2724 0, 2725 0, 2726 /*139*/MatSetBlockSizes_MPIAIJ, 2727 0, 2728 0, 2729 MatFDColoringSetUp_MPIXAIJ, 2730 MatFindOffBlockDiagonalEntries_MPIAIJ, 2731 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2732 /*145*/0, 2733 0, 2734 0 2735 }; 2736 2737 /* ----------------------------------------------------------------------------------------*/ 2738 2739 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2740 { 2741 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2742 PetscErrorCode ierr; 2743 2744 PetscFunctionBegin; 2745 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2746 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2747 PetscFunctionReturn(0); 2748 } 2749 2750 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2751 { 2752 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2753 PetscErrorCode ierr; 2754 2755 PetscFunctionBegin; 2756 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2757 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2758 PetscFunctionReturn(0); 2759 } 2760 2761 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2762 { 2763 Mat_MPIAIJ *b; 2764 PetscErrorCode ierr; 2765 PetscMPIInt size; 2766 2767 PetscFunctionBegin; 2768 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2769 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2770 b = (Mat_MPIAIJ*)B->data; 2771 2772 #if defined(PETSC_USE_CTABLE) 2773 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2774 #else 2775 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2776 #endif 2777 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2778 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2779 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2780 2781 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2782 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2783 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2784 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2785 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2786 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2787 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2788 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2789 2790 if (!B->preallocated) { 2791 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2792 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2793 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2794 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2795 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2796 } 2797 2798 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2799 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2800 B->preallocated = PETSC_TRUE; 2801 B->was_assembled = PETSC_FALSE; 2802 B->assembled = PETSC_FALSE; 2803 PetscFunctionReturn(0); 2804 } 2805 2806 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2807 { 2808 Mat_MPIAIJ *b; 2809 PetscErrorCode ierr; 2810 2811 PetscFunctionBegin; 2812 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2813 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2814 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2815 b = (Mat_MPIAIJ*)B->data; 2816 2817 #if defined(PETSC_USE_CTABLE) 2818 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2819 #else 2820 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2821 #endif 2822 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2823 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2824 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2825 2826 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2827 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2828 B->preallocated = PETSC_TRUE; 2829 B->was_assembled = PETSC_FALSE; 2830 B->assembled = PETSC_FALSE; 2831 PetscFunctionReturn(0); 2832 } 2833 2834 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2835 { 2836 Mat mat; 2837 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2838 PetscErrorCode ierr; 2839 2840 PetscFunctionBegin; 2841 *newmat = 0; 2842 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2843 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2844 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2845 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2846 a = (Mat_MPIAIJ*)mat->data; 2847 2848 mat->factortype = matin->factortype; 2849 mat->assembled = matin->assembled; 2850 mat->insertmode = NOT_SET_VALUES; 2851 mat->preallocated = matin->preallocated; 2852 2853 a->size = oldmat->size; 2854 a->rank = oldmat->rank; 2855 a->donotstash = oldmat->donotstash; 2856 a->roworiented = oldmat->roworiented; 2857 a->rowindices = NULL; 2858 a->rowvalues = NULL; 2859 a->getrowactive = PETSC_FALSE; 2860 2861 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2862 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2863 2864 if (oldmat->colmap) { 2865 #if defined(PETSC_USE_CTABLE) 2866 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2867 #else 2868 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2869 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2870 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2871 #endif 2872 } else a->colmap = NULL; 2873 if (oldmat->garray) { 2874 PetscInt len; 2875 len = oldmat->B->cmap->n; 2876 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2877 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2878 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2879 } else a->garray = NULL; 2880 2881 /* It may happen MatDuplicate is called with a non-assembled matrix 2882 In fact, MatDuplicate only requires the matrix to be preallocated 2883 This may happen inside a DMCreateMatrix_Shell */ 2884 if (oldmat->lvec) { 2885 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2886 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2887 } 2888 if (oldmat->Mvctx) { 2889 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2890 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2891 } 2892 if (oldmat->Mvctx_mpi1) { 2893 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2894 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2895 } 2896 2897 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2898 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2899 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2900 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2901 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2902 *newmat = mat; 2903 PetscFunctionReturn(0); 2904 } 2905 2906 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2907 { 2908 PetscBool isbinary, ishdf5; 2909 PetscErrorCode ierr; 2910 2911 PetscFunctionBegin; 2912 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2913 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2914 /* force binary viewer to load .info file if it has not yet done so */ 2915 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2916 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2917 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2918 if (isbinary) { 2919 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2920 } else if (ishdf5) { 2921 #if defined(PETSC_HAVE_HDF5) 2922 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2923 #else 2924 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2925 #endif 2926 } else { 2927 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2928 } 2929 PetscFunctionReturn(0); 2930 } 2931 2932 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2933 { 2934 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2935 PetscInt *rowidxs,*colidxs; 2936 PetscScalar *matvals; 2937 PetscErrorCode ierr; 2938 2939 PetscFunctionBegin; 2940 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2941 2942 /* read in matrix header */ 2943 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2944 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2945 M = header[1]; N = header[2]; nz = header[3]; 2946 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 2947 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 2948 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2949 2950 /* set block sizes from the viewer's .info file */ 2951 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 2952 /* set global sizes if not set already */ 2953 if (mat->rmap->N < 0) mat->rmap->N = M; 2954 if (mat->cmap->N < 0) mat->cmap->N = N; 2955 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 2956 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 2957 2958 /* check if the matrix sizes are correct */ 2959 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2960 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 2961 2962 /* read in row lengths and build row indices */ 2963 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 2964 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 2965 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 2966 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 2967 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 2968 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 2969 /* read in column indices and matrix values */ 2970 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 2971 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 2972 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 2973 /* store matrix indices and values */ 2974 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 2975 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 2976 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 2977 PetscFunctionReturn(0); 2978 } 2979 2980 /* Not scalable because of ISAllGather() unless getting all columns. */ 2981 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2982 { 2983 PetscErrorCode ierr; 2984 IS iscol_local; 2985 PetscBool isstride; 2986 PetscMPIInt lisstride=0,gisstride; 2987 2988 PetscFunctionBegin; 2989 /* check if we are grabbing all columns*/ 2990 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2991 2992 if (isstride) { 2993 PetscInt start,len,mstart,mlen; 2994 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2995 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2996 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 2997 if (mstart == start && mlen-mstart == len) lisstride = 1; 2998 } 2999 3000 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3001 if (gisstride) { 3002 PetscInt N; 3003 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3004 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3005 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3006 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3007 } else { 3008 PetscInt cbs; 3009 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3010 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3011 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3012 } 3013 3014 *isseq = iscol_local; 3015 PetscFunctionReturn(0); 3016 } 3017 3018 /* 3019 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3020 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3021 3022 Input Parameters: 3023 mat - matrix 3024 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3025 i.e., mat->rstart <= isrow[i] < mat->rend 3026 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3027 i.e., mat->cstart <= iscol[i] < mat->cend 3028 Output Parameter: 3029 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3030 iscol_o - sequential column index set for retrieving mat->B 3031 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3032 */ 3033 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3034 { 3035 PetscErrorCode ierr; 3036 Vec x,cmap; 3037 const PetscInt *is_idx; 3038 PetscScalar *xarray,*cmaparray; 3039 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3040 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3041 Mat B=a->B; 3042 Vec lvec=a->lvec,lcmap; 3043 PetscInt i,cstart,cend,Bn=B->cmap->N; 3044 MPI_Comm comm; 3045 VecScatter Mvctx=a->Mvctx; 3046 3047 PetscFunctionBegin; 3048 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3049 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3050 3051 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3052 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3053 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3054 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3055 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3056 3057 /* Get start indices */ 3058 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3059 isstart -= ncols; 3060 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3061 3062 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3063 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3064 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3065 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3066 for (i=0; i<ncols; i++) { 3067 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3068 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3069 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3070 } 3071 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3072 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3073 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3074 3075 /* Get iscol_d */ 3076 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3077 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3078 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3079 3080 /* Get isrow_d */ 3081 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3082 rstart = mat->rmap->rstart; 3083 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3084 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3085 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3086 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3087 3088 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3089 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3090 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3091 3092 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3093 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3094 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3095 3096 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3097 3098 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3099 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3100 3101 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3102 /* off-process column indices */ 3103 count = 0; 3104 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3105 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3106 3107 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3108 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3109 for (i=0; i<Bn; i++) { 3110 if (PetscRealPart(xarray[i]) > -1.0) { 3111 idx[count] = i; /* local column index in off-diagonal part B */ 3112 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3113 count++; 3114 } 3115 } 3116 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3117 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3118 3119 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3120 /* cannot ensure iscol_o has same blocksize as iscol! */ 3121 3122 ierr = PetscFree(idx);CHKERRQ(ierr); 3123 *garray = cmap1; 3124 3125 ierr = VecDestroy(&x);CHKERRQ(ierr); 3126 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3127 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3128 PetscFunctionReturn(0); 3129 } 3130 3131 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3132 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3133 { 3134 PetscErrorCode ierr; 3135 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3136 Mat M = NULL; 3137 MPI_Comm comm; 3138 IS iscol_d,isrow_d,iscol_o; 3139 Mat Asub = NULL,Bsub = NULL; 3140 PetscInt n; 3141 3142 PetscFunctionBegin; 3143 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3144 3145 if (call == MAT_REUSE_MATRIX) { 3146 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3147 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3148 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3149 3150 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3151 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3152 3153 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3154 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3155 3156 /* Update diagonal and off-diagonal portions of submat */ 3157 asub = (Mat_MPIAIJ*)(*submat)->data; 3158 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3159 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3160 if (n) { 3161 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3162 } 3163 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3164 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3165 3166 } else { /* call == MAT_INITIAL_MATRIX) */ 3167 const PetscInt *garray; 3168 PetscInt BsubN; 3169 3170 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3171 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3172 3173 /* Create local submatrices Asub and Bsub */ 3174 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3175 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3176 3177 /* Create submatrix M */ 3178 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3179 3180 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3181 asub = (Mat_MPIAIJ*)M->data; 3182 3183 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3184 n = asub->B->cmap->N; 3185 if (BsubN > n) { 3186 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3187 const PetscInt *idx; 3188 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3189 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3190 3191 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3192 j = 0; 3193 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3194 for (i=0; i<n; i++) { 3195 if (j >= BsubN) break; 3196 while (subgarray[i] > garray[j]) j++; 3197 3198 if (subgarray[i] == garray[j]) { 3199 idx_new[i] = idx[j++]; 3200 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3201 } 3202 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3203 3204 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3205 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3206 3207 } else if (BsubN < n) { 3208 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3209 } 3210 3211 ierr = PetscFree(garray);CHKERRQ(ierr); 3212 *submat = M; 3213 3214 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3215 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3216 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3217 3218 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3219 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3220 3221 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3222 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3223 } 3224 PetscFunctionReturn(0); 3225 } 3226 3227 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3228 { 3229 PetscErrorCode ierr; 3230 IS iscol_local=NULL,isrow_d; 3231 PetscInt csize; 3232 PetscInt n,i,j,start,end; 3233 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3234 MPI_Comm comm; 3235 3236 PetscFunctionBegin; 3237 /* If isrow has same processor distribution as mat, 3238 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3239 if (call == MAT_REUSE_MATRIX) { 3240 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3241 if (isrow_d) { 3242 sameRowDist = PETSC_TRUE; 3243 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3244 } else { 3245 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3246 if (iscol_local) { 3247 sameRowDist = PETSC_TRUE; 3248 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3249 } 3250 } 3251 } else { 3252 /* Check if isrow has same processor distribution as mat */ 3253 sameDist[0] = PETSC_FALSE; 3254 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3255 if (!n) { 3256 sameDist[0] = PETSC_TRUE; 3257 } else { 3258 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3259 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3260 if (i >= start && j < end) { 3261 sameDist[0] = PETSC_TRUE; 3262 } 3263 } 3264 3265 /* Check if iscol has same processor distribution as mat */ 3266 sameDist[1] = PETSC_FALSE; 3267 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3268 if (!n) { 3269 sameDist[1] = PETSC_TRUE; 3270 } else { 3271 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3272 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3273 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3274 } 3275 3276 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3277 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3278 sameRowDist = tsameDist[0]; 3279 } 3280 3281 if (sameRowDist) { 3282 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3283 /* isrow and iscol have same processor distribution as mat */ 3284 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3285 PetscFunctionReturn(0); 3286 } else { /* sameRowDist */ 3287 /* isrow has same processor distribution as mat */ 3288 if (call == MAT_INITIAL_MATRIX) { 3289 PetscBool sorted; 3290 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3291 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3292 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3293 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3294 3295 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3296 if (sorted) { 3297 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3298 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3299 PetscFunctionReturn(0); 3300 } 3301 } else { /* call == MAT_REUSE_MATRIX */ 3302 IS iscol_sub; 3303 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3304 if (iscol_sub) { 3305 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3306 PetscFunctionReturn(0); 3307 } 3308 } 3309 } 3310 } 3311 3312 /* General case: iscol -> iscol_local which has global size of iscol */ 3313 if (call == MAT_REUSE_MATRIX) { 3314 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3315 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3316 } else { 3317 if (!iscol_local) { 3318 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3319 } 3320 } 3321 3322 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3323 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3324 3325 if (call == MAT_INITIAL_MATRIX) { 3326 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3327 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3328 } 3329 PetscFunctionReturn(0); 3330 } 3331 3332 /*@C 3333 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3334 and "off-diagonal" part of the matrix in CSR format. 3335 3336 Collective 3337 3338 Input Parameters: 3339 + comm - MPI communicator 3340 . A - "diagonal" portion of matrix 3341 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3342 - garray - global index of B columns 3343 3344 Output Parameter: 3345 . mat - the matrix, with input A as its local diagonal matrix 3346 Level: advanced 3347 3348 Notes: 3349 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3350 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3351 3352 .seealso: MatCreateMPIAIJWithSplitArrays() 3353 @*/ 3354 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3355 { 3356 PetscErrorCode ierr; 3357 Mat_MPIAIJ *maij; 3358 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3359 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3360 PetscScalar *oa=b->a; 3361 Mat Bnew; 3362 PetscInt m,n,N; 3363 3364 PetscFunctionBegin; 3365 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3366 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3367 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3368 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3369 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3370 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3371 3372 /* Get global columns of mat */ 3373 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3374 3375 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3376 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3377 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3378 maij = (Mat_MPIAIJ*)(*mat)->data; 3379 3380 (*mat)->preallocated = PETSC_TRUE; 3381 3382 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3383 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3384 3385 /* Set A as diagonal portion of *mat */ 3386 maij->A = A; 3387 3388 nz = oi[m]; 3389 for (i=0; i<nz; i++) { 3390 col = oj[i]; 3391 oj[i] = garray[col]; 3392 } 3393 3394 /* Set Bnew as off-diagonal portion of *mat */ 3395 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3396 bnew = (Mat_SeqAIJ*)Bnew->data; 3397 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3398 maij->B = Bnew; 3399 3400 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3401 3402 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3403 b->free_a = PETSC_FALSE; 3404 b->free_ij = PETSC_FALSE; 3405 ierr = MatDestroy(&B);CHKERRQ(ierr); 3406 3407 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3408 bnew->free_a = PETSC_TRUE; 3409 bnew->free_ij = PETSC_TRUE; 3410 3411 /* condense columns of maij->B */ 3412 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3413 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3414 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3415 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3416 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3417 PetscFunctionReturn(0); 3418 } 3419 3420 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3421 3422 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3423 { 3424 PetscErrorCode ierr; 3425 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3426 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3427 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3428 Mat M,Msub,B=a->B; 3429 MatScalar *aa; 3430 Mat_SeqAIJ *aij; 3431 PetscInt *garray = a->garray,*colsub,Ncols; 3432 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3433 IS iscol_sub,iscmap; 3434 const PetscInt *is_idx,*cmap; 3435 PetscBool allcolumns=PETSC_FALSE; 3436 MPI_Comm comm; 3437 3438 PetscFunctionBegin; 3439 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3440 3441 if (call == MAT_REUSE_MATRIX) { 3442 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3443 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3444 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3445 3446 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3447 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3448 3449 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3450 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3451 3452 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3453 3454 } else { /* call == MAT_INITIAL_MATRIX) */ 3455 PetscBool flg; 3456 3457 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3458 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3459 3460 /* (1) iscol -> nonscalable iscol_local */ 3461 /* Check for special case: each processor gets entire matrix columns */ 3462 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3463 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3464 if (allcolumns) { 3465 iscol_sub = iscol_local; 3466 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3467 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3468 3469 } else { 3470 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3471 PetscInt *idx,*cmap1,k; 3472 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3473 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3474 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3475 count = 0; 3476 k = 0; 3477 for (i=0; i<Ncols; i++) { 3478 j = is_idx[i]; 3479 if (j >= cstart && j < cend) { 3480 /* diagonal part of mat */ 3481 idx[count] = j; 3482 cmap1[count++] = i; /* column index in submat */ 3483 } else if (Bn) { 3484 /* off-diagonal part of mat */ 3485 if (j == garray[k]) { 3486 idx[count] = j; 3487 cmap1[count++] = i; /* column index in submat */ 3488 } else if (j > garray[k]) { 3489 while (j > garray[k] && k < Bn-1) k++; 3490 if (j == garray[k]) { 3491 idx[count] = j; 3492 cmap1[count++] = i; /* column index in submat */ 3493 } 3494 } 3495 } 3496 } 3497 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3498 3499 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3500 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3501 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3502 3503 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3504 } 3505 3506 /* (3) Create sequential Msub */ 3507 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3508 } 3509 3510 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3511 aij = (Mat_SeqAIJ*)(Msub)->data; 3512 ii = aij->i; 3513 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3514 3515 /* 3516 m - number of local rows 3517 Ncols - number of columns (same on all processors) 3518 rstart - first row in new global matrix generated 3519 */ 3520 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3521 3522 if (call == MAT_INITIAL_MATRIX) { 3523 /* (4) Create parallel newmat */ 3524 PetscMPIInt rank,size; 3525 PetscInt csize; 3526 3527 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3528 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3529 3530 /* 3531 Determine the number of non-zeros in the diagonal and off-diagonal 3532 portions of the matrix in order to do correct preallocation 3533 */ 3534 3535 /* first get start and end of "diagonal" columns */ 3536 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3537 if (csize == PETSC_DECIDE) { 3538 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3539 if (mglobal == Ncols) { /* square matrix */ 3540 nlocal = m; 3541 } else { 3542 nlocal = Ncols/size + ((Ncols % size) > rank); 3543 } 3544 } else { 3545 nlocal = csize; 3546 } 3547 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3548 rstart = rend - nlocal; 3549 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3550 3551 /* next, compute all the lengths */ 3552 jj = aij->j; 3553 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3554 olens = dlens + m; 3555 for (i=0; i<m; i++) { 3556 jend = ii[i+1] - ii[i]; 3557 olen = 0; 3558 dlen = 0; 3559 for (j=0; j<jend; j++) { 3560 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3561 else dlen++; 3562 jj++; 3563 } 3564 olens[i] = olen; 3565 dlens[i] = dlen; 3566 } 3567 3568 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3569 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3570 3571 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3572 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3573 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3574 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3575 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3576 ierr = PetscFree(dlens);CHKERRQ(ierr); 3577 3578 } else { /* call == MAT_REUSE_MATRIX */ 3579 M = *newmat; 3580 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3581 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3582 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3583 /* 3584 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3585 rather than the slower MatSetValues(). 3586 */ 3587 M->was_assembled = PETSC_TRUE; 3588 M->assembled = PETSC_FALSE; 3589 } 3590 3591 /* (5) Set values of Msub to *newmat */ 3592 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3593 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3594 3595 jj = aij->j; 3596 aa = aij->a; 3597 for (i=0; i<m; i++) { 3598 row = rstart + i; 3599 nz = ii[i+1] - ii[i]; 3600 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3601 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3602 jj += nz; aa += nz; 3603 } 3604 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3605 3606 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3607 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3608 3609 ierr = PetscFree(colsub);CHKERRQ(ierr); 3610 3611 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3612 if (call == MAT_INITIAL_MATRIX) { 3613 *newmat = M; 3614 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3615 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3616 3617 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3618 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3619 3620 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3621 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3622 3623 if (iscol_local) { 3624 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3625 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3626 } 3627 } 3628 PetscFunctionReturn(0); 3629 } 3630 3631 /* 3632 Not great since it makes two copies of the submatrix, first an SeqAIJ 3633 in local and then by concatenating the local matrices the end result. 3634 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3635 3636 Note: This requires a sequential iscol with all indices. 3637 */ 3638 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3639 { 3640 PetscErrorCode ierr; 3641 PetscMPIInt rank,size; 3642 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3643 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3644 Mat M,Mreuse; 3645 MatScalar *aa,*vwork; 3646 MPI_Comm comm; 3647 Mat_SeqAIJ *aij; 3648 PetscBool colflag,allcolumns=PETSC_FALSE; 3649 3650 PetscFunctionBegin; 3651 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3652 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3653 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3654 3655 /* Check for special case: each processor gets entire matrix columns */ 3656 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3657 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3658 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3659 3660 if (call == MAT_REUSE_MATRIX) { 3661 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3662 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3663 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3664 } else { 3665 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3666 } 3667 3668 /* 3669 m - number of local rows 3670 n - number of columns (same on all processors) 3671 rstart - first row in new global matrix generated 3672 */ 3673 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3674 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3675 if (call == MAT_INITIAL_MATRIX) { 3676 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3677 ii = aij->i; 3678 jj = aij->j; 3679 3680 /* 3681 Determine the number of non-zeros in the diagonal and off-diagonal 3682 portions of the matrix in order to do correct preallocation 3683 */ 3684 3685 /* first get start and end of "diagonal" columns */ 3686 if (csize == PETSC_DECIDE) { 3687 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3688 if (mglobal == n) { /* square matrix */ 3689 nlocal = m; 3690 } else { 3691 nlocal = n/size + ((n % size) > rank); 3692 } 3693 } else { 3694 nlocal = csize; 3695 } 3696 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3697 rstart = rend - nlocal; 3698 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3699 3700 /* next, compute all the lengths */ 3701 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3702 olens = dlens + m; 3703 for (i=0; i<m; i++) { 3704 jend = ii[i+1] - ii[i]; 3705 olen = 0; 3706 dlen = 0; 3707 for (j=0; j<jend; j++) { 3708 if (*jj < rstart || *jj >= rend) olen++; 3709 else dlen++; 3710 jj++; 3711 } 3712 olens[i] = olen; 3713 dlens[i] = dlen; 3714 } 3715 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3716 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3717 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3718 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3719 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3720 ierr = PetscFree(dlens);CHKERRQ(ierr); 3721 } else { 3722 PetscInt ml,nl; 3723 3724 M = *newmat; 3725 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3726 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3727 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3728 /* 3729 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3730 rather than the slower MatSetValues(). 3731 */ 3732 M->was_assembled = PETSC_TRUE; 3733 M->assembled = PETSC_FALSE; 3734 } 3735 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3736 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3737 ii = aij->i; 3738 jj = aij->j; 3739 aa = aij->a; 3740 for (i=0; i<m; i++) { 3741 row = rstart + i; 3742 nz = ii[i+1] - ii[i]; 3743 cwork = jj; jj += nz; 3744 vwork = aa; aa += nz; 3745 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3746 } 3747 3748 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3749 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3750 *newmat = M; 3751 3752 /* save submatrix used in processor for next request */ 3753 if (call == MAT_INITIAL_MATRIX) { 3754 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3755 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3756 } 3757 PetscFunctionReturn(0); 3758 } 3759 3760 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3761 { 3762 PetscInt m,cstart, cend,j,nnz,i,d; 3763 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3764 const PetscInt *JJ; 3765 PetscErrorCode ierr; 3766 PetscBool nooffprocentries; 3767 3768 PetscFunctionBegin; 3769 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3770 3771 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3772 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3773 m = B->rmap->n; 3774 cstart = B->cmap->rstart; 3775 cend = B->cmap->rend; 3776 rstart = B->rmap->rstart; 3777 3778 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3779 3780 if (PetscDefined(USE_DEBUG)) { 3781 for (i=0; i<m; i++) { 3782 nnz = Ii[i+1]- Ii[i]; 3783 JJ = J + Ii[i]; 3784 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3785 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3786 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3787 } 3788 } 3789 3790 for (i=0; i<m; i++) { 3791 nnz = Ii[i+1]- Ii[i]; 3792 JJ = J + Ii[i]; 3793 nnz_max = PetscMax(nnz_max,nnz); 3794 d = 0; 3795 for (j=0; j<nnz; j++) { 3796 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3797 } 3798 d_nnz[i] = d; 3799 o_nnz[i] = nnz - d; 3800 } 3801 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3802 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3803 3804 for (i=0; i<m; i++) { 3805 ii = i + rstart; 3806 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3807 } 3808 nooffprocentries = B->nooffprocentries; 3809 B->nooffprocentries = PETSC_TRUE; 3810 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3811 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3812 B->nooffprocentries = nooffprocentries; 3813 3814 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3815 PetscFunctionReturn(0); 3816 } 3817 3818 /*@ 3819 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3820 (the default parallel PETSc format). 3821 3822 Collective 3823 3824 Input Parameters: 3825 + B - the matrix 3826 . i - the indices into j for the start of each local row (starts with zero) 3827 . j - the column indices for each local row (starts with zero) 3828 - v - optional values in the matrix 3829 3830 Level: developer 3831 3832 Notes: 3833 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3834 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3835 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3836 3837 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3838 3839 The format which is used for the sparse matrix input, is equivalent to a 3840 row-major ordering.. i.e for the following matrix, the input data expected is 3841 as shown 3842 3843 $ 1 0 0 3844 $ 2 0 3 P0 3845 $ ------- 3846 $ 4 5 6 P1 3847 $ 3848 $ Process0 [P0]: rows_owned=[0,1] 3849 $ i = {0,1,3} [size = nrow+1 = 2+1] 3850 $ j = {0,0,2} [size = 3] 3851 $ v = {1,2,3} [size = 3] 3852 $ 3853 $ Process1 [P1]: rows_owned=[2] 3854 $ i = {0,3} [size = nrow+1 = 1+1] 3855 $ j = {0,1,2} [size = 3] 3856 $ v = {4,5,6} [size = 3] 3857 3858 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3859 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3860 @*/ 3861 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3862 { 3863 PetscErrorCode ierr; 3864 3865 PetscFunctionBegin; 3866 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3867 PetscFunctionReturn(0); 3868 } 3869 3870 /*@C 3871 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3872 (the default parallel PETSc format). For good matrix assembly performance 3873 the user should preallocate the matrix storage by setting the parameters 3874 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3875 performance can be increased by more than a factor of 50. 3876 3877 Collective 3878 3879 Input Parameters: 3880 + B - the matrix 3881 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3882 (same value is used for all local rows) 3883 . d_nnz - array containing the number of nonzeros in the various rows of the 3884 DIAGONAL portion of the local submatrix (possibly different for each row) 3885 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3886 The size of this array is equal to the number of local rows, i.e 'm'. 3887 For matrices that will be factored, you must leave room for (and set) 3888 the diagonal entry even if it is zero. 3889 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3890 submatrix (same value is used for all local rows). 3891 - o_nnz - array containing the number of nonzeros in the various rows of the 3892 OFF-DIAGONAL portion of the local submatrix (possibly different for 3893 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3894 structure. The size of this array is equal to the number 3895 of local rows, i.e 'm'. 3896 3897 If the *_nnz parameter is given then the *_nz parameter is ignored 3898 3899 The AIJ format (also called the Yale sparse matrix format or 3900 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3901 storage. The stored row and column indices begin with zero. 3902 See Users-Manual: ch_mat for details. 3903 3904 The parallel matrix is partitioned such that the first m0 rows belong to 3905 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3906 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3907 3908 The DIAGONAL portion of the local submatrix of a processor can be defined 3909 as the submatrix which is obtained by extraction the part corresponding to 3910 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3911 first row that belongs to the processor, r2 is the last row belonging to 3912 the this processor, and c1-c2 is range of indices of the local part of a 3913 vector suitable for applying the matrix to. This is an mxn matrix. In the 3914 common case of a square matrix, the row and column ranges are the same and 3915 the DIAGONAL part is also square. The remaining portion of the local 3916 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3917 3918 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3919 3920 You can call MatGetInfo() to get information on how effective the preallocation was; 3921 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3922 You can also run with the option -info and look for messages with the string 3923 malloc in them to see if additional memory allocation was needed. 3924 3925 Example usage: 3926 3927 Consider the following 8x8 matrix with 34 non-zero values, that is 3928 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3929 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3930 as follows: 3931 3932 .vb 3933 1 2 0 | 0 3 0 | 0 4 3934 Proc0 0 5 6 | 7 0 0 | 8 0 3935 9 0 10 | 11 0 0 | 12 0 3936 ------------------------------------- 3937 13 0 14 | 15 16 17 | 0 0 3938 Proc1 0 18 0 | 19 20 21 | 0 0 3939 0 0 0 | 22 23 0 | 24 0 3940 ------------------------------------- 3941 Proc2 25 26 27 | 0 0 28 | 29 0 3942 30 0 0 | 31 32 33 | 0 34 3943 .ve 3944 3945 This can be represented as a collection of submatrices as: 3946 3947 .vb 3948 A B C 3949 D E F 3950 G H I 3951 .ve 3952 3953 Where the submatrices A,B,C are owned by proc0, D,E,F are 3954 owned by proc1, G,H,I are owned by proc2. 3955 3956 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3957 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3958 The 'M','N' parameters are 8,8, and have the same values on all procs. 3959 3960 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3961 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3962 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3963 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3964 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3965 matrix, ans [DF] as another SeqAIJ matrix. 3966 3967 When d_nz, o_nz parameters are specified, d_nz storage elements are 3968 allocated for every row of the local diagonal submatrix, and o_nz 3969 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3970 One way to choose d_nz and o_nz is to use the max nonzerors per local 3971 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3972 In this case, the values of d_nz,o_nz are: 3973 .vb 3974 proc0 : dnz = 2, o_nz = 2 3975 proc1 : dnz = 3, o_nz = 2 3976 proc2 : dnz = 1, o_nz = 4 3977 .ve 3978 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3979 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3980 for proc3. i.e we are using 12+15+10=37 storage locations to store 3981 34 values. 3982 3983 When d_nnz, o_nnz parameters are specified, the storage is specified 3984 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3985 In the above case the values for d_nnz,o_nnz are: 3986 .vb 3987 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3988 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3989 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3990 .ve 3991 Here the space allocated is sum of all the above values i.e 34, and 3992 hence pre-allocation is perfect. 3993 3994 Level: intermediate 3995 3996 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3997 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3998 @*/ 3999 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4000 { 4001 PetscErrorCode ierr; 4002 4003 PetscFunctionBegin; 4004 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4005 PetscValidType(B,1); 4006 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4007 PetscFunctionReturn(0); 4008 } 4009 4010 /*@ 4011 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4012 CSR format for the local rows. 4013 4014 Collective 4015 4016 Input Parameters: 4017 + comm - MPI communicator 4018 . m - number of local rows (Cannot be PETSC_DECIDE) 4019 . n - This value should be the same as the local size used in creating the 4020 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4021 calculated if N is given) For square matrices n is almost always m. 4022 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4023 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4024 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4025 . j - column indices 4026 - a - matrix values 4027 4028 Output Parameter: 4029 . mat - the matrix 4030 4031 Level: intermediate 4032 4033 Notes: 4034 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4035 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4036 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4037 4038 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4039 4040 The format which is used for the sparse matrix input, is equivalent to a 4041 row-major ordering.. i.e for the following matrix, the input data expected is 4042 as shown 4043 4044 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4045 4046 $ 1 0 0 4047 $ 2 0 3 P0 4048 $ ------- 4049 $ 4 5 6 P1 4050 $ 4051 $ Process0 [P0]: rows_owned=[0,1] 4052 $ i = {0,1,3} [size = nrow+1 = 2+1] 4053 $ j = {0,0,2} [size = 3] 4054 $ v = {1,2,3} [size = 3] 4055 $ 4056 $ Process1 [P1]: rows_owned=[2] 4057 $ i = {0,3} [size = nrow+1 = 1+1] 4058 $ j = {0,1,2} [size = 3] 4059 $ v = {4,5,6} [size = 3] 4060 4061 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4062 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4063 @*/ 4064 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4065 { 4066 PetscErrorCode ierr; 4067 4068 PetscFunctionBegin; 4069 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4070 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4071 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4072 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4073 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4074 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4075 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4076 PetscFunctionReturn(0); 4077 } 4078 4079 /*@ 4080 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4081 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4082 4083 Collective 4084 4085 Input Parameters: 4086 + mat - the matrix 4087 . m - number of local rows (Cannot be PETSC_DECIDE) 4088 . n - This value should be the same as the local size used in creating the 4089 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4090 calculated if N is given) For square matrices n is almost always m. 4091 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4092 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4093 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4094 . J - column indices 4095 - v - matrix values 4096 4097 Level: intermediate 4098 4099 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4100 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4101 @*/ 4102 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4103 { 4104 PetscErrorCode ierr; 4105 PetscInt cstart,nnz,i,j; 4106 PetscInt *ld; 4107 PetscBool nooffprocentries; 4108 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4109 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4110 PetscScalar *ad = Ad->a, *ao = Ao->a; 4111 const PetscInt *Adi = Ad->i; 4112 PetscInt ldi,Iii,md; 4113 4114 PetscFunctionBegin; 4115 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4116 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4117 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4118 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4119 4120 cstart = mat->cmap->rstart; 4121 if (!Aij->ld) { 4122 /* count number of entries below block diagonal */ 4123 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4124 Aij->ld = ld; 4125 for (i=0; i<m; i++) { 4126 nnz = Ii[i+1]- Ii[i]; 4127 j = 0; 4128 while (J[j] < cstart && j < nnz) {j++;} 4129 J += nnz; 4130 ld[i] = j; 4131 } 4132 } else { 4133 ld = Aij->ld; 4134 } 4135 4136 for (i=0; i<m; i++) { 4137 nnz = Ii[i+1]- Ii[i]; 4138 Iii = Ii[i]; 4139 ldi = ld[i]; 4140 md = Adi[i+1]-Adi[i]; 4141 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4142 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4143 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4144 ad += md; 4145 ao += nnz - md; 4146 } 4147 nooffprocentries = mat->nooffprocentries; 4148 mat->nooffprocentries = PETSC_TRUE; 4149 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4150 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4151 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4152 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4153 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4154 mat->nooffprocentries = nooffprocentries; 4155 PetscFunctionReturn(0); 4156 } 4157 4158 /*@C 4159 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4160 (the default parallel PETSc format). For good matrix assembly performance 4161 the user should preallocate the matrix storage by setting the parameters 4162 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4163 performance can be increased by more than a factor of 50. 4164 4165 Collective 4166 4167 Input Parameters: 4168 + comm - MPI communicator 4169 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4170 This value should be the same as the local size used in creating the 4171 y vector for the matrix-vector product y = Ax. 4172 . n - This value should be the same as the local size used in creating the 4173 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4174 calculated if N is given) For square matrices n is almost always m. 4175 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4176 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4177 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4178 (same value is used for all local rows) 4179 . d_nnz - array containing the number of nonzeros in the various rows of the 4180 DIAGONAL portion of the local submatrix (possibly different for each row) 4181 or NULL, if d_nz is used to specify the nonzero structure. 4182 The size of this array is equal to the number of local rows, i.e 'm'. 4183 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4184 submatrix (same value is used for all local rows). 4185 - o_nnz - array containing the number of nonzeros in the various rows of the 4186 OFF-DIAGONAL portion of the local submatrix (possibly different for 4187 each row) or NULL, if o_nz is used to specify the nonzero 4188 structure. The size of this array is equal to the number 4189 of local rows, i.e 'm'. 4190 4191 Output Parameter: 4192 . A - the matrix 4193 4194 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4195 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4196 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4197 4198 Notes: 4199 If the *_nnz parameter is given then the *_nz parameter is ignored 4200 4201 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4202 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4203 storage requirements for this matrix. 4204 4205 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4206 processor than it must be used on all processors that share the object for 4207 that argument. 4208 4209 The user MUST specify either the local or global matrix dimensions 4210 (possibly both). 4211 4212 The parallel matrix is partitioned across processors such that the 4213 first m0 rows belong to process 0, the next m1 rows belong to 4214 process 1, the next m2 rows belong to process 2 etc.. where 4215 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4216 values corresponding to [m x N] submatrix. 4217 4218 The columns are logically partitioned with the n0 columns belonging 4219 to 0th partition, the next n1 columns belonging to the next 4220 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4221 4222 The DIAGONAL portion of the local submatrix on any given processor 4223 is the submatrix corresponding to the rows and columns m,n 4224 corresponding to the given processor. i.e diagonal matrix on 4225 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4226 etc. The remaining portion of the local submatrix [m x (N-n)] 4227 constitute the OFF-DIAGONAL portion. The example below better 4228 illustrates this concept. 4229 4230 For a square global matrix we define each processor's diagonal portion 4231 to be its local rows and the corresponding columns (a square submatrix); 4232 each processor's off-diagonal portion encompasses the remainder of the 4233 local matrix (a rectangular submatrix). 4234 4235 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4236 4237 When calling this routine with a single process communicator, a matrix of 4238 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4239 type of communicator, use the construction mechanism 4240 .vb 4241 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4242 .ve 4243 4244 $ MatCreate(...,&A); 4245 $ MatSetType(A,MATMPIAIJ); 4246 $ MatSetSizes(A, m,n,M,N); 4247 $ MatMPIAIJSetPreallocation(A,...); 4248 4249 By default, this format uses inodes (identical nodes) when possible. 4250 We search for consecutive rows with the same nonzero structure, thereby 4251 reusing matrix information to achieve increased efficiency. 4252 4253 Options Database Keys: 4254 + -mat_no_inode - Do not use inodes 4255 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4256 4257 4258 4259 Example usage: 4260 4261 Consider the following 8x8 matrix with 34 non-zero values, that is 4262 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4263 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4264 as follows 4265 4266 .vb 4267 1 2 0 | 0 3 0 | 0 4 4268 Proc0 0 5 6 | 7 0 0 | 8 0 4269 9 0 10 | 11 0 0 | 12 0 4270 ------------------------------------- 4271 13 0 14 | 15 16 17 | 0 0 4272 Proc1 0 18 0 | 19 20 21 | 0 0 4273 0 0 0 | 22 23 0 | 24 0 4274 ------------------------------------- 4275 Proc2 25 26 27 | 0 0 28 | 29 0 4276 30 0 0 | 31 32 33 | 0 34 4277 .ve 4278 4279 This can be represented as a collection of submatrices as 4280 4281 .vb 4282 A B C 4283 D E F 4284 G H I 4285 .ve 4286 4287 Where the submatrices A,B,C are owned by proc0, D,E,F are 4288 owned by proc1, G,H,I are owned by proc2. 4289 4290 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4291 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4292 The 'M','N' parameters are 8,8, and have the same values on all procs. 4293 4294 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4295 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4296 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4297 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4298 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4299 matrix, ans [DF] as another SeqAIJ matrix. 4300 4301 When d_nz, o_nz parameters are specified, d_nz storage elements are 4302 allocated for every row of the local diagonal submatrix, and o_nz 4303 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4304 One way to choose d_nz and o_nz is to use the max nonzerors per local 4305 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4306 In this case, the values of d_nz,o_nz are 4307 .vb 4308 proc0 : dnz = 2, o_nz = 2 4309 proc1 : dnz = 3, o_nz = 2 4310 proc2 : dnz = 1, o_nz = 4 4311 .ve 4312 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4313 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4314 for proc3. i.e we are using 12+15+10=37 storage locations to store 4315 34 values. 4316 4317 When d_nnz, o_nnz parameters are specified, the storage is specified 4318 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4319 In the above case the values for d_nnz,o_nnz are 4320 .vb 4321 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4322 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4323 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4324 .ve 4325 Here the space allocated is sum of all the above values i.e 34, and 4326 hence pre-allocation is perfect. 4327 4328 Level: intermediate 4329 4330 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4331 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4332 @*/ 4333 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4334 { 4335 PetscErrorCode ierr; 4336 PetscMPIInt size; 4337 4338 PetscFunctionBegin; 4339 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4340 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4341 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4342 if (size > 1) { 4343 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4344 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4345 } else { 4346 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4347 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4348 } 4349 PetscFunctionReturn(0); 4350 } 4351 4352 /*@C 4353 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4354 4355 Not collective 4356 4357 Input Parameter: 4358 . A - The MPIAIJ matrix 4359 4360 Output Parameters: 4361 + Ad - The local diagonal block as a SeqAIJ matrix 4362 . Ao - The local off-diagonal block as a SeqAIJ matrix 4363 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4364 4365 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4366 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4367 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4368 local column numbers to global column numbers in the original matrix. 4369 4370 Level: intermediate 4371 4372 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAJ, MATSEQAIJ 4373 @*/ 4374 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4375 { 4376 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4377 PetscBool flg; 4378 PetscErrorCode ierr; 4379 4380 PetscFunctionBegin; 4381 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4382 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4383 if (Ad) *Ad = a->A; 4384 if (Ao) *Ao = a->B; 4385 if (colmap) *colmap = a->garray; 4386 PetscFunctionReturn(0); 4387 } 4388 4389 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4390 { 4391 PetscErrorCode ierr; 4392 PetscInt m,N,i,rstart,nnz,Ii; 4393 PetscInt *indx; 4394 PetscScalar *values; 4395 4396 PetscFunctionBegin; 4397 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4398 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4399 PetscInt *dnz,*onz,sum,bs,cbs; 4400 4401 if (n == PETSC_DECIDE) { 4402 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4403 } 4404 /* Check sum(n) = N */ 4405 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4406 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4407 4408 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4409 rstart -= m; 4410 4411 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4412 for (i=0; i<m; i++) { 4413 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4414 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4415 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4416 } 4417 4418 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4419 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4420 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4421 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4422 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4423 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4424 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4425 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4426 } 4427 4428 /* numeric phase */ 4429 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4430 for (i=0; i<m; i++) { 4431 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4432 Ii = i + rstart; 4433 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4434 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4435 } 4436 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4437 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4438 PetscFunctionReturn(0); 4439 } 4440 4441 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4442 { 4443 PetscErrorCode ierr; 4444 PetscMPIInt rank; 4445 PetscInt m,N,i,rstart,nnz; 4446 size_t len; 4447 const PetscInt *indx; 4448 PetscViewer out; 4449 char *name; 4450 Mat B; 4451 const PetscScalar *values; 4452 4453 PetscFunctionBegin; 4454 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4455 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4456 /* Should this be the type of the diagonal block of A? */ 4457 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4458 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4459 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4460 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4461 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4462 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4463 for (i=0; i<m; i++) { 4464 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4465 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4466 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4467 } 4468 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4469 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4470 4471 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4472 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4473 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4474 sprintf(name,"%s.%d",outfile,rank); 4475 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4476 ierr = PetscFree(name);CHKERRQ(ierr); 4477 ierr = MatView(B,out);CHKERRQ(ierr); 4478 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4479 ierr = MatDestroy(&B);CHKERRQ(ierr); 4480 PetscFunctionReturn(0); 4481 } 4482 4483 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4484 { 4485 PetscErrorCode ierr; 4486 Mat_Merge_SeqsToMPI *merge; 4487 PetscContainer container; 4488 4489 PetscFunctionBegin; 4490 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4491 if (container) { 4492 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4505 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4506 ierr = PetscFree(merge);CHKERRQ(ierr); 4507 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4508 } 4509 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4510 PetscFunctionReturn(0); 4511 } 4512 4513 #include <../src/mat/utils/freespace.h> 4514 #include <petscbt.h> 4515 4516 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4517 { 4518 PetscErrorCode ierr; 4519 MPI_Comm comm; 4520 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4521 PetscMPIInt size,rank,taga,*len_s; 4522 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4523 PetscInt proc,m; 4524 PetscInt **buf_ri,**buf_rj; 4525 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4526 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4527 MPI_Request *s_waits,*r_waits; 4528 MPI_Status *status; 4529 MatScalar *aa=a->a; 4530 MatScalar **abuf_r,*ba_i; 4531 Mat_Merge_SeqsToMPI *merge; 4532 PetscContainer container; 4533 4534 PetscFunctionBegin; 4535 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4536 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4537 4538 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4539 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4540 4541 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4542 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4543 4544 bi = merge->bi; 4545 bj = merge->bj; 4546 buf_ri = merge->buf_ri; 4547 buf_rj = merge->buf_rj; 4548 4549 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4550 owners = merge->rowmap->range; 4551 len_s = merge->len_s; 4552 4553 /* send and recv matrix values */ 4554 /*-----------------------------*/ 4555 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4556 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4557 4558 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4559 for (proc=0,k=0; proc<size; proc++) { 4560 if (!len_s[proc]) continue; 4561 i = owners[proc]; 4562 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4563 k++; 4564 } 4565 4566 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4567 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4568 ierr = PetscFree(status);CHKERRQ(ierr); 4569 4570 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4571 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4572 4573 /* insert mat values of mpimat */ 4574 /*----------------------------*/ 4575 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4576 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4577 4578 for (k=0; k<merge->nrecv; k++) { 4579 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4580 nrows = *(buf_ri_k[k]); 4581 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4582 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4583 } 4584 4585 /* set values of ba */ 4586 m = merge->rowmap->n; 4587 for (i=0; i<m; i++) { 4588 arow = owners[rank] + i; 4589 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4590 bnzi = bi[i+1] - bi[i]; 4591 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4592 4593 /* add local non-zero vals of this proc's seqmat into ba */ 4594 anzi = ai[arow+1] - ai[arow]; 4595 aj = a->j + ai[arow]; 4596 aa = a->a + ai[arow]; 4597 nextaj = 0; 4598 for (j=0; nextaj<anzi; j++) { 4599 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4600 ba_i[j] += aa[nextaj++]; 4601 } 4602 } 4603 4604 /* add received vals into ba */ 4605 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4606 /* i-th row */ 4607 if (i == *nextrow[k]) { 4608 anzi = *(nextai[k]+1) - *nextai[k]; 4609 aj = buf_rj[k] + *(nextai[k]); 4610 aa = abuf_r[k] + *(nextai[k]); 4611 nextaj = 0; 4612 for (j=0; nextaj<anzi; j++) { 4613 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4614 ba_i[j] += aa[nextaj++]; 4615 } 4616 } 4617 nextrow[k]++; nextai[k]++; 4618 } 4619 } 4620 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4621 } 4622 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4623 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4624 4625 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4626 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4627 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4628 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4629 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4630 PetscFunctionReturn(0); 4631 } 4632 4633 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4634 { 4635 PetscErrorCode ierr; 4636 Mat B_mpi; 4637 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4638 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4639 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4640 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4641 PetscInt len,proc,*dnz,*onz,bs,cbs; 4642 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4643 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4644 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4645 MPI_Status *status; 4646 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4647 PetscBT lnkbt; 4648 Mat_Merge_SeqsToMPI *merge; 4649 PetscContainer container; 4650 4651 PetscFunctionBegin; 4652 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4653 4654 /* make sure it is a PETSc comm */ 4655 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4656 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4657 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4658 4659 ierr = PetscNew(&merge);CHKERRQ(ierr); 4660 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4661 4662 /* determine row ownership */ 4663 /*---------------------------------------------------------*/ 4664 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4665 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4666 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4667 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4668 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4669 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4670 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4671 4672 m = merge->rowmap->n; 4673 owners = merge->rowmap->range; 4674 4675 /* determine the number of messages to send, their lengths */ 4676 /*---------------------------------------------------------*/ 4677 len_s = merge->len_s; 4678 4679 len = 0; /* length of buf_si[] */ 4680 merge->nsend = 0; 4681 for (proc=0; proc<size; proc++) { 4682 len_si[proc] = 0; 4683 if (proc == rank) { 4684 len_s[proc] = 0; 4685 } else { 4686 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4687 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4688 } 4689 if (len_s[proc]) { 4690 merge->nsend++; 4691 nrows = 0; 4692 for (i=owners[proc]; i<owners[proc+1]; i++) { 4693 if (ai[i+1] > ai[i]) nrows++; 4694 } 4695 len_si[proc] = 2*(nrows+1); 4696 len += len_si[proc]; 4697 } 4698 } 4699 4700 /* determine the number and length of messages to receive for ij-structure */ 4701 /*-------------------------------------------------------------------------*/ 4702 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4703 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4704 4705 /* post the Irecv of j-structure */ 4706 /*-------------------------------*/ 4707 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4708 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4709 4710 /* post the Isend of j-structure */ 4711 /*--------------------------------*/ 4712 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4713 4714 for (proc=0, k=0; proc<size; proc++) { 4715 if (!len_s[proc]) continue; 4716 i = owners[proc]; 4717 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4718 k++; 4719 } 4720 4721 /* receives and sends of j-structure are complete */ 4722 /*------------------------------------------------*/ 4723 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4724 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4725 4726 /* send and recv i-structure */ 4727 /*---------------------------*/ 4728 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4729 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4730 4731 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4732 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4733 for (proc=0,k=0; proc<size; proc++) { 4734 if (!len_s[proc]) continue; 4735 /* form outgoing message for i-structure: 4736 buf_si[0]: nrows to be sent 4737 [1:nrows]: row index (global) 4738 [nrows+1:2*nrows+1]: i-structure index 4739 */ 4740 /*-------------------------------------------*/ 4741 nrows = len_si[proc]/2 - 1; 4742 buf_si_i = buf_si + nrows+1; 4743 buf_si[0] = nrows; 4744 buf_si_i[0] = 0; 4745 nrows = 0; 4746 for (i=owners[proc]; i<owners[proc+1]; i++) { 4747 anzi = ai[i+1] - ai[i]; 4748 if (anzi) { 4749 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4750 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4751 nrows++; 4752 } 4753 } 4754 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4755 k++; 4756 buf_si += len_si[proc]; 4757 } 4758 4759 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4760 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4761 4762 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4763 for (i=0; i<merge->nrecv; i++) { 4764 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4765 } 4766 4767 ierr = PetscFree(len_si);CHKERRQ(ierr); 4768 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4769 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4770 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4771 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4772 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4773 ierr = PetscFree(status);CHKERRQ(ierr); 4774 4775 /* compute a local seq matrix in each processor */ 4776 /*----------------------------------------------*/ 4777 /* allocate bi array and free space for accumulating nonzero column info */ 4778 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4779 bi[0] = 0; 4780 4781 /* create and initialize a linked list */ 4782 nlnk = N+1; 4783 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4784 4785 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4786 len = ai[owners[rank+1]] - ai[owners[rank]]; 4787 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4788 4789 current_space = free_space; 4790 4791 /* determine symbolic info for each local row */ 4792 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4793 4794 for (k=0; k<merge->nrecv; k++) { 4795 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4796 nrows = *buf_ri_k[k]; 4797 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4798 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4799 } 4800 4801 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4802 len = 0; 4803 for (i=0; i<m; i++) { 4804 bnzi = 0; 4805 /* add local non-zero cols of this proc's seqmat into lnk */ 4806 arow = owners[rank] + i; 4807 anzi = ai[arow+1] - ai[arow]; 4808 aj = a->j + ai[arow]; 4809 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4810 bnzi += nlnk; 4811 /* add received col data into lnk */ 4812 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4813 if (i == *nextrow[k]) { /* i-th row */ 4814 anzi = *(nextai[k]+1) - *nextai[k]; 4815 aj = buf_rj[k] + *nextai[k]; 4816 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4817 bnzi += nlnk; 4818 nextrow[k]++; nextai[k]++; 4819 } 4820 } 4821 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4822 4823 /* if free space is not available, make more free space */ 4824 if (current_space->local_remaining<bnzi) { 4825 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4826 nspacedouble++; 4827 } 4828 /* copy data into free space, then initialize lnk */ 4829 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4830 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4831 4832 current_space->array += bnzi; 4833 current_space->local_used += bnzi; 4834 current_space->local_remaining -= bnzi; 4835 4836 bi[i+1] = bi[i] + bnzi; 4837 } 4838 4839 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4840 4841 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4842 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4843 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4844 4845 /* create symbolic parallel matrix B_mpi */ 4846 /*---------------------------------------*/ 4847 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4848 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4849 if (n==PETSC_DECIDE) { 4850 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4851 } else { 4852 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4853 } 4854 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4855 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4856 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4857 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4858 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4859 4860 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4861 B_mpi->assembled = PETSC_FALSE; 4862 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4863 merge->bi = bi; 4864 merge->bj = bj; 4865 merge->buf_ri = buf_ri; 4866 merge->buf_rj = buf_rj; 4867 merge->coi = NULL; 4868 merge->coj = NULL; 4869 merge->owners_co = NULL; 4870 4871 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4872 4873 /* attach the supporting struct to B_mpi for reuse */ 4874 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4875 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4876 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4877 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4878 *mpimat = B_mpi; 4879 4880 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4881 PetscFunctionReturn(0); 4882 } 4883 4884 /*@C 4885 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4886 matrices from each processor 4887 4888 Collective 4889 4890 Input Parameters: 4891 + comm - the communicators the parallel matrix will live on 4892 . seqmat - the input sequential matrices 4893 . m - number of local rows (or PETSC_DECIDE) 4894 . n - number of local columns (or PETSC_DECIDE) 4895 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4896 4897 Output Parameter: 4898 . mpimat - the parallel matrix generated 4899 4900 Level: advanced 4901 4902 Notes: 4903 The dimensions of the sequential matrix in each processor MUST be the same. 4904 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4905 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4906 @*/ 4907 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4908 { 4909 PetscErrorCode ierr; 4910 PetscMPIInt size; 4911 4912 PetscFunctionBegin; 4913 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4914 if (size == 1) { 4915 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4916 if (scall == MAT_INITIAL_MATRIX) { 4917 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4918 } else { 4919 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4920 } 4921 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4922 PetscFunctionReturn(0); 4923 } 4924 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4925 if (scall == MAT_INITIAL_MATRIX) { 4926 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4927 } 4928 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4929 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4930 PetscFunctionReturn(0); 4931 } 4932 4933 /*@ 4934 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4935 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4936 with MatGetSize() 4937 4938 Not Collective 4939 4940 Input Parameters: 4941 + A - the matrix 4942 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4943 4944 Output Parameter: 4945 . A_loc - the local sequential matrix generated 4946 4947 Level: developer 4948 4949 Notes: 4950 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4951 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4952 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4953 modify the values of the returned A_loc. 4954 4955 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4956 4957 @*/ 4958 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4959 { 4960 PetscErrorCode ierr; 4961 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4962 Mat_SeqAIJ *mat,*a,*b; 4963 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4964 MatScalar *aa,*ba,*cam; 4965 PetscScalar *ca; 4966 PetscMPIInt size; 4967 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4968 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4969 PetscBool match; 4970 4971 PetscFunctionBegin; 4972 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 4973 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4974 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 4975 if (size == 1) { 4976 if (scall == MAT_INITIAL_MATRIX) { 4977 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 4978 *A_loc = mpimat->A; 4979 } else if (scall == MAT_REUSE_MATRIX) { 4980 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4981 } 4982 PetscFunctionReturn(0); 4983 } 4984 4985 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4986 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4987 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4988 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4989 aa = a->a; ba = b->a; 4990 if (scall == MAT_INITIAL_MATRIX) { 4991 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4992 ci[0] = 0; 4993 for (i=0; i<am; i++) { 4994 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4995 } 4996 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4997 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4998 k = 0; 4999 for (i=0; i<am; i++) { 5000 ncols_o = bi[i+1] - bi[i]; 5001 ncols_d = ai[i+1] - ai[i]; 5002 /* off-diagonal portion of A */ 5003 for (jo=0; jo<ncols_o; jo++) { 5004 col = cmap[*bj]; 5005 if (col >= cstart) break; 5006 cj[k] = col; bj++; 5007 ca[k++] = *ba++; 5008 } 5009 /* diagonal portion of A */ 5010 for (j=0; j<ncols_d; j++) { 5011 cj[k] = cstart + *aj++; 5012 ca[k++] = *aa++; 5013 } 5014 /* off-diagonal portion of A */ 5015 for (j=jo; j<ncols_o; j++) { 5016 cj[k] = cmap[*bj++]; 5017 ca[k++] = *ba++; 5018 } 5019 } 5020 /* put together the new matrix */ 5021 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5022 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5023 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5024 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5025 mat->free_a = PETSC_TRUE; 5026 mat->free_ij = PETSC_TRUE; 5027 mat->nonew = 0; 5028 } else if (scall == MAT_REUSE_MATRIX) { 5029 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5030 ci = mat->i; cj = mat->j; cam = mat->a; 5031 for (i=0; i<am; i++) { 5032 /* off-diagonal portion of A */ 5033 ncols_o = bi[i+1] - bi[i]; 5034 for (jo=0; jo<ncols_o; jo++) { 5035 col = cmap[*bj]; 5036 if (col >= cstart) break; 5037 *cam++ = *ba++; bj++; 5038 } 5039 /* diagonal portion of A */ 5040 ncols_d = ai[i+1] - ai[i]; 5041 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5042 /* off-diagonal portion of A */ 5043 for (j=jo; j<ncols_o; j++) { 5044 *cam++ = *ba++; bj++; 5045 } 5046 } 5047 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5048 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5049 PetscFunctionReturn(0); 5050 } 5051 5052 /*@C 5053 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5054 5055 Not Collective 5056 5057 Input Parameters: 5058 + A - the matrix 5059 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5060 - row, col - index sets of rows and columns to extract (or NULL) 5061 5062 Output Parameter: 5063 . A_loc - the local sequential matrix generated 5064 5065 Level: developer 5066 5067 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5068 5069 @*/ 5070 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5071 { 5072 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5073 PetscErrorCode ierr; 5074 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5075 IS isrowa,iscola; 5076 Mat *aloc; 5077 PetscBool match; 5078 5079 PetscFunctionBegin; 5080 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5081 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5082 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5083 if (!row) { 5084 start = A->rmap->rstart; end = A->rmap->rend; 5085 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5086 } else { 5087 isrowa = *row; 5088 } 5089 if (!col) { 5090 start = A->cmap->rstart; 5091 cmap = a->garray; 5092 nzA = a->A->cmap->n; 5093 nzB = a->B->cmap->n; 5094 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5095 ncols = 0; 5096 for (i=0; i<nzB; i++) { 5097 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5098 else break; 5099 } 5100 imark = i; 5101 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5102 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5103 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5104 } else { 5105 iscola = *col; 5106 } 5107 if (scall != MAT_INITIAL_MATRIX) { 5108 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5109 aloc[0] = *A_loc; 5110 } 5111 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5112 if (!col) { /* attach global id of condensed columns */ 5113 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5114 } 5115 *A_loc = aloc[0]; 5116 ierr = PetscFree(aloc);CHKERRQ(ierr); 5117 if (!row) { 5118 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5119 } 5120 if (!col) { 5121 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5122 } 5123 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5124 PetscFunctionReturn(0); 5125 } 5126 5127 /* 5128 * Destroy a mat that may be composed with PetscSF communication objects. 5129 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5130 * */ 5131 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5132 { 5133 PetscSF sf,osf; 5134 IS map; 5135 PetscErrorCode ierr; 5136 5137 PetscFunctionBegin; 5138 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5139 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5140 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5141 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5142 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5143 ierr = ISDestroy(&map);CHKERRQ(ierr); 5144 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5145 PetscFunctionReturn(0); 5146 } 5147 5148 /* 5149 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5150 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5151 * on a global size. 5152 * */ 5153 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5154 { 5155 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5156 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5157 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5158 PetscMPIInt owner; 5159 PetscSFNode *iremote,*oiremote; 5160 const PetscInt *lrowindices; 5161 PetscErrorCode ierr; 5162 PetscSF sf,osf; 5163 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5164 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5165 MPI_Comm comm; 5166 ISLocalToGlobalMapping mapping; 5167 5168 PetscFunctionBegin; 5169 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5170 /* plocalsize is the number of roots 5171 * nrows is the number of leaves 5172 * */ 5173 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5174 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5175 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5176 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5177 for (i=0;i<nrows;i++) { 5178 /* Find a remote index and an owner for a row 5179 * The row could be local or remote 5180 * */ 5181 owner = 0; 5182 lidx = 0; 5183 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5184 iremote[i].index = lidx; 5185 iremote[i].rank = owner; 5186 } 5187 /* Create SF to communicate how many nonzero columns for each row */ 5188 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5189 /* SF will figure out the number of nonzero colunms for each row, and their 5190 * offsets 5191 * */ 5192 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5193 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5194 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5195 5196 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5197 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5198 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5199 roffsets[0] = 0; 5200 roffsets[1] = 0; 5201 for (i=0;i<plocalsize;i++) { 5202 /* diag */ 5203 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5204 /* off diag */ 5205 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5206 /* compute offsets so that we relative location for each row */ 5207 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5208 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5209 } 5210 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5211 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5212 /* 'r' means root, and 'l' means leaf */ 5213 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5214 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5215 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5216 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5217 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5218 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5219 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5220 dntotalcols = 0; 5221 ontotalcols = 0; 5222 ncol = 0; 5223 for (i=0;i<nrows;i++) { 5224 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5225 ncol = PetscMax(pnnz[i],ncol); 5226 /* diag */ 5227 dntotalcols += nlcols[i*2+0]; 5228 /* off diag */ 5229 ontotalcols += nlcols[i*2+1]; 5230 } 5231 /* We do not need to figure the right number of columns 5232 * since all the calculations will be done by going through the raw data 5233 * */ 5234 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5235 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5236 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5237 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5238 /* diag */ 5239 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5240 /* off diag */ 5241 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5242 /* diag */ 5243 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5244 /* off diag */ 5245 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5246 dntotalcols = 0; 5247 ontotalcols = 0; 5248 ntotalcols = 0; 5249 for (i=0;i<nrows;i++) { 5250 owner = 0; 5251 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5252 /* Set iremote for diag matrix */ 5253 for (j=0;j<nlcols[i*2+0];j++) { 5254 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5255 iremote[dntotalcols].rank = owner; 5256 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5257 ilocal[dntotalcols++] = ntotalcols++; 5258 } 5259 /* off diag */ 5260 for (j=0;j<nlcols[i*2+1];j++) { 5261 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5262 oiremote[ontotalcols].rank = owner; 5263 oilocal[ontotalcols++] = ntotalcols++; 5264 } 5265 } 5266 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5267 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5268 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5269 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5270 /* P serves as roots and P_oth is leaves 5271 * Diag matrix 5272 * */ 5273 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5274 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5275 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5276 5277 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5278 /* Off diag */ 5279 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5280 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5281 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5282 /* We operate on the matrix internal data for saving memory */ 5283 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5284 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5285 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5286 /* Convert to global indices for diag matrix */ 5287 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5288 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5289 /* We want P_oth store global indices */ 5290 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5291 /* Use memory scalable approach */ 5292 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5293 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5294 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5295 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5296 /* Convert back to local indices */ 5297 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5298 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5299 nout = 0; 5300 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5301 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5302 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5303 /* Exchange values */ 5304 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5305 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5306 /* Stop PETSc from shrinking memory */ 5307 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5308 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5309 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5310 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5311 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5312 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5313 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5314 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5315 PetscFunctionReturn(0); 5316 } 5317 5318 /* 5319 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5320 * This supports MPIAIJ and MAIJ 5321 * */ 5322 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5323 { 5324 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5325 Mat_SeqAIJ *p_oth; 5326 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5327 IS rows,map; 5328 PetscHMapI hamp; 5329 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5330 MPI_Comm comm; 5331 PetscSF sf,osf; 5332 PetscBool has; 5333 PetscErrorCode ierr; 5334 5335 PetscFunctionBegin; 5336 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5337 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5338 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5339 * and then create a submatrix (that often is an overlapping matrix) 5340 * */ 5341 if (reuse==MAT_INITIAL_MATRIX) { 5342 /* Use a hash table to figure out unique keys */ 5343 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5344 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5345 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5346 count = 0; 5347 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5348 for (i=0;i<a->B->cmap->n;i++) { 5349 key = a->garray[i]/dof; 5350 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5351 if (!has) { 5352 mapping[i] = count; 5353 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5354 } else { 5355 /* Current 'i' has the same value the previous step */ 5356 mapping[i] = count-1; 5357 } 5358 } 5359 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5360 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5361 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5362 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5363 off = 0; 5364 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5365 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5366 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5367 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5368 /* In case, the matrix was already created but users want to recreate the matrix */ 5369 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5370 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5371 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5372 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5373 } else if (reuse==MAT_REUSE_MATRIX) { 5374 /* If matrix was already created, we simply update values using SF objects 5375 * that as attached to the matrix ealier. 5376 * */ 5377 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5378 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5379 if (!sf || !osf) { 5380 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5381 } 5382 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5383 /* Update values in place */ 5384 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5385 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5386 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5387 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5388 } else { 5389 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5390 } 5391 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5392 PetscFunctionReturn(0); 5393 } 5394 5395 /*@C 5396 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5397 5398 Collective on Mat 5399 5400 Input Parameters: 5401 + A,B - the matrices in mpiaij format 5402 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5403 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5404 5405 Output Parameter: 5406 + rowb, colb - index sets of rows and columns of B to extract 5407 - B_seq - the sequential matrix generated 5408 5409 Level: developer 5410 5411 @*/ 5412 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5413 { 5414 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5415 PetscErrorCode ierr; 5416 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5417 IS isrowb,iscolb; 5418 Mat *bseq=NULL; 5419 5420 PetscFunctionBegin; 5421 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5422 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5423 } 5424 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5425 5426 if (scall == MAT_INITIAL_MATRIX) { 5427 start = A->cmap->rstart; 5428 cmap = a->garray; 5429 nzA = a->A->cmap->n; 5430 nzB = a->B->cmap->n; 5431 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5432 ncols = 0; 5433 for (i=0; i<nzB; i++) { /* row < local row index */ 5434 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5435 else break; 5436 } 5437 imark = i; 5438 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5439 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5440 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5441 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5442 } else { 5443 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5444 isrowb = *rowb; iscolb = *colb; 5445 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5446 bseq[0] = *B_seq; 5447 } 5448 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5449 *B_seq = bseq[0]; 5450 ierr = PetscFree(bseq);CHKERRQ(ierr); 5451 if (!rowb) { 5452 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5453 } else { 5454 *rowb = isrowb; 5455 } 5456 if (!colb) { 5457 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5458 } else { 5459 *colb = iscolb; 5460 } 5461 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5462 PetscFunctionReturn(0); 5463 } 5464 5465 /* 5466 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5467 of the OFF-DIAGONAL portion of local A 5468 5469 Collective on Mat 5470 5471 Input Parameters: 5472 + A,B - the matrices in mpiaij format 5473 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5474 5475 Output Parameter: 5476 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5477 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5478 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5479 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5480 5481 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5482 for this matrix. This is not desirable.. 5483 5484 Level: developer 5485 5486 */ 5487 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5488 { 5489 PetscErrorCode ierr; 5490 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5491 Mat_SeqAIJ *b_oth; 5492 VecScatter ctx; 5493 MPI_Comm comm; 5494 const PetscMPIInt *rprocs,*sprocs; 5495 const PetscInt *srow,*rstarts,*sstarts; 5496 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5497 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5498 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5499 MPI_Request *rwaits = NULL,*swaits = NULL; 5500 MPI_Status rstatus; 5501 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5502 5503 PetscFunctionBegin; 5504 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5505 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5506 5507 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5508 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5509 } 5510 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5511 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5512 5513 if (size == 1) { 5514 startsj_s = NULL; 5515 bufa_ptr = NULL; 5516 *B_oth = NULL; 5517 PetscFunctionReturn(0); 5518 } 5519 5520 ctx = a->Mvctx; 5521 tag = ((PetscObject)ctx)->tag; 5522 5523 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5524 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5525 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5526 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5527 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5528 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5529 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5530 5531 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5532 if (scall == MAT_INITIAL_MATRIX) { 5533 /* i-array */ 5534 /*---------*/ 5535 /* post receives */ 5536 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5537 for (i=0; i<nrecvs; i++) { 5538 rowlen = rvalues + rstarts[i]*rbs; 5539 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5540 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5541 } 5542 5543 /* pack the outgoing message */ 5544 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5545 5546 sstartsj[0] = 0; 5547 rstartsj[0] = 0; 5548 len = 0; /* total length of j or a array to be sent */ 5549 if (nsends) { 5550 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5551 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5552 } 5553 for (i=0; i<nsends; i++) { 5554 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5555 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5556 for (j=0; j<nrows; j++) { 5557 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5558 for (l=0; l<sbs; l++) { 5559 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5560 5561 rowlen[j*sbs+l] = ncols; 5562 5563 len += ncols; 5564 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5565 } 5566 k++; 5567 } 5568 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5569 5570 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5571 } 5572 /* recvs and sends of i-array are completed */ 5573 i = nrecvs; 5574 while (i--) { 5575 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5576 } 5577 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5578 ierr = PetscFree(svalues);CHKERRQ(ierr); 5579 5580 /* allocate buffers for sending j and a arrays */ 5581 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5582 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5583 5584 /* create i-array of B_oth */ 5585 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5586 5587 b_othi[0] = 0; 5588 len = 0; /* total length of j or a array to be received */ 5589 k = 0; 5590 for (i=0; i<nrecvs; i++) { 5591 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5592 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5593 for (j=0; j<nrows; j++) { 5594 b_othi[k+1] = b_othi[k] + rowlen[j]; 5595 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5596 k++; 5597 } 5598 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5599 } 5600 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5601 5602 /* allocate space for j and a arrrays of B_oth */ 5603 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5604 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5605 5606 /* j-array */ 5607 /*---------*/ 5608 /* post receives of j-array */ 5609 for (i=0; i<nrecvs; i++) { 5610 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5611 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5612 } 5613 5614 /* pack the outgoing message j-array */ 5615 if (nsends) k = sstarts[0]; 5616 for (i=0; i<nsends; i++) { 5617 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5618 bufJ = bufj+sstartsj[i]; 5619 for (j=0; j<nrows; j++) { 5620 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5621 for (ll=0; ll<sbs; ll++) { 5622 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5623 for (l=0; l<ncols; l++) { 5624 *bufJ++ = cols[l]; 5625 } 5626 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5627 } 5628 } 5629 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5630 } 5631 5632 /* recvs and sends of j-array are completed */ 5633 i = nrecvs; 5634 while (i--) { 5635 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5636 } 5637 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5638 } else if (scall == MAT_REUSE_MATRIX) { 5639 sstartsj = *startsj_s; 5640 rstartsj = *startsj_r; 5641 bufa = *bufa_ptr; 5642 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5643 b_otha = b_oth->a; 5644 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5645 5646 /* a-array */ 5647 /*---------*/ 5648 /* post receives of a-array */ 5649 for (i=0; i<nrecvs; i++) { 5650 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5651 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5652 } 5653 5654 /* pack the outgoing message a-array */ 5655 if (nsends) k = sstarts[0]; 5656 for (i=0; i<nsends; i++) { 5657 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5658 bufA = bufa+sstartsj[i]; 5659 for (j=0; j<nrows; j++) { 5660 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5661 for (ll=0; ll<sbs; ll++) { 5662 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5663 for (l=0; l<ncols; l++) { 5664 *bufA++ = vals[l]; 5665 } 5666 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5667 } 5668 } 5669 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5670 } 5671 /* recvs and sends of a-array are completed */ 5672 i = nrecvs; 5673 while (i--) { 5674 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5675 } 5676 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5677 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5678 5679 if (scall == MAT_INITIAL_MATRIX) { 5680 /* put together the new matrix */ 5681 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5682 5683 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5684 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5685 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5686 b_oth->free_a = PETSC_TRUE; 5687 b_oth->free_ij = PETSC_TRUE; 5688 b_oth->nonew = 0; 5689 5690 ierr = PetscFree(bufj);CHKERRQ(ierr); 5691 if (!startsj_s || !bufa_ptr) { 5692 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5693 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5694 } else { 5695 *startsj_s = sstartsj; 5696 *startsj_r = rstartsj; 5697 *bufa_ptr = bufa; 5698 } 5699 } 5700 5701 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5702 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5703 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5704 PetscFunctionReturn(0); 5705 } 5706 5707 /*@C 5708 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5709 5710 Not Collective 5711 5712 Input Parameters: 5713 . A - The matrix in mpiaij format 5714 5715 Output Parameter: 5716 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5717 . colmap - A map from global column index to local index into lvec 5718 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5719 5720 Level: developer 5721 5722 @*/ 5723 #if defined(PETSC_USE_CTABLE) 5724 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5725 #else 5726 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5727 #endif 5728 { 5729 Mat_MPIAIJ *a; 5730 5731 PetscFunctionBegin; 5732 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5733 PetscValidPointer(lvec, 2); 5734 PetscValidPointer(colmap, 3); 5735 PetscValidPointer(multScatter, 4); 5736 a = (Mat_MPIAIJ*) A->data; 5737 if (lvec) *lvec = a->lvec; 5738 if (colmap) *colmap = a->colmap; 5739 if (multScatter) *multScatter = a->Mvctx; 5740 PetscFunctionReturn(0); 5741 } 5742 5743 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5744 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5745 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5746 #if defined(PETSC_HAVE_MKL_SPARSE) 5747 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5748 #endif 5749 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5750 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5751 #if defined(PETSC_HAVE_ELEMENTAL) 5752 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5753 #endif 5754 #if defined(PETSC_HAVE_HYPRE) 5755 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5756 #endif 5757 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5758 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5759 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5760 5761 /* 5762 Computes (B'*A')' since computing B*A directly is untenable 5763 5764 n p p 5765 ( ) ( ) ( ) 5766 m ( A ) * n ( B ) = m ( C ) 5767 ( ) ( ) ( ) 5768 5769 */ 5770 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5771 { 5772 PetscErrorCode ierr; 5773 Mat At,Bt,Ct; 5774 5775 PetscFunctionBegin; 5776 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5777 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5778 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5779 ierr = MatDestroy(&At);CHKERRQ(ierr); 5780 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5781 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5782 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5783 PetscFunctionReturn(0); 5784 } 5785 5786 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5787 { 5788 PetscErrorCode ierr; 5789 PetscInt m=A->rmap->n,n=B->cmap->n; 5790 5791 PetscFunctionBegin; 5792 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5793 ierr = MatSetSizes(C,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5794 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5795 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 5796 ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr); 5797 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5798 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5799 5800 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5801 PetscFunctionReturn(0); 5802 } 5803 5804 /* ----------------------------------------------------------------*/ 5805 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5806 { 5807 Mat_Product *product = C->product; 5808 Mat A = product->A,B=product->B; 5809 5810 PetscFunctionBegin; 5811 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5812 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5813 5814 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5815 C->ops->productsymbolic = MatProductSymbolic_AB; 5816 PetscFunctionReturn(0); 5817 } 5818 5819 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5820 { 5821 PetscErrorCode ierr; 5822 Mat_Product *product = C->product; 5823 5824 PetscFunctionBegin; 5825 if (product->type == MATPRODUCT_AB) { 5826 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 5827 } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIAIJ matrices",MatProductTypes[product->type]); 5828 PetscFunctionReturn(0); 5829 } 5830 /* ----------------------------------------------------------------*/ 5831 5832 /*MC 5833 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5834 5835 Options Database Keys: 5836 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5837 5838 Level: beginner 5839 5840 Notes: 5841 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5842 in this case the values associated with the rows and columns one passes in are set to zero 5843 in the matrix 5844 5845 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5846 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5847 5848 .seealso: MatCreateAIJ() 5849 M*/ 5850 5851 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5852 { 5853 Mat_MPIAIJ *b; 5854 PetscErrorCode ierr; 5855 PetscMPIInt size; 5856 5857 PetscFunctionBegin; 5858 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5859 5860 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5861 B->data = (void*)b; 5862 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5863 B->assembled = PETSC_FALSE; 5864 B->insertmode = NOT_SET_VALUES; 5865 b->size = size; 5866 5867 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5868 5869 /* build cache for off array entries formed */ 5870 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5871 5872 b->donotstash = PETSC_FALSE; 5873 b->colmap = 0; 5874 b->garray = 0; 5875 b->roworiented = PETSC_TRUE; 5876 5877 /* stuff used for matrix vector multiply */ 5878 b->lvec = NULL; 5879 b->Mvctx = NULL; 5880 5881 /* stuff for MatGetRow() */ 5882 b->rowindices = 0; 5883 b->rowvalues = 0; 5884 b->getrowactive = PETSC_FALSE; 5885 5886 /* flexible pointer used in CUSP/CUSPARSE classes */ 5887 b->spptr = NULL; 5888 5889 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5890 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5891 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5892 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5893 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5894 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5895 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5896 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5897 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5898 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5899 #if defined(PETSC_HAVE_MKL_SPARSE) 5900 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5901 #endif 5902 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5903 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 5904 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5905 #if defined(PETSC_HAVE_ELEMENTAL) 5906 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5907 #endif 5908 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5909 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5910 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5911 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5912 #if defined(PETSC_HAVE_HYPRE) 5913 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5914 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5915 #endif 5916 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 5917 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 5918 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5919 PetscFunctionReturn(0); 5920 } 5921 5922 /*@C 5923 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5924 and "off-diagonal" part of the matrix in CSR format. 5925 5926 Collective 5927 5928 Input Parameters: 5929 + comm - MPI communicator 5930 . m - number of local rows (Cannot be PETSC_DECIDE) 5931 . n - This value should be the same as the local size used in creating the 5932 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5933 calculated if N is given) For square matrices n is almost always m. 5934 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5935 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5936 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5937 . j - column indices 5938 . a - matrix values 5939 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5940 . oj - column indices 5941 - oa - matrix values 5942 5943 Output Parameter: 5944 . mat - the matrix 5945 5946 Level: advanced 5947 5948 Notes: 5949 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5950 must free the arrays once the matrix has been destroyed and not before. 5951 5952 The i and j indices are 0 based 5953 5954 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5955 5956 This sets local rows and cannot be used to set off-processor values. 5957 5958 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5959 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5960 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5961 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5962 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5963 communication if it is known that only local entries will be set. 5964 5965 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5966 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5967 @*/ 5968 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5969 { 5970 PetscErrorCode ierr; 5971 Mat_MPIAIJ *maij; 5972 5973 PetscFunctionBegin; 5974 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5975 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5976 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5977 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5978 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5979 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5980 maij = (Mat_MPIAIJ*) (*mat)->data; 5981 5982 (*mat)->preallocated = PETSC_TRUE; 5983 5984 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5985 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5986 5987 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5988 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5989 5990 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5991 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5992 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5993 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5994 5995 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5996 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5997 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5998 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5999 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6000 PetscFunctionReturn(0); 6001 } 6002 6003 /* 6004 Special version for direct calls from Fortran 6005 */ 6006 #include <petsc/private/fortranimpl.h> 6007 6008 /* Change these macros so can be used in void function */ 6009 #undef CHKERRQ 6010 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6011 #undef SETERRQ2 6012 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6013 #undef SETERRQ3 6014 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6015 #undef SETERRQ 6016 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6017 6018 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6019 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6020 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6021 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6022 #else 6023 #endif 6024 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6025 { 6026 Mat mat = *mmat; 6027 PetscInt m = *mm, n = *mn; 6028 InsertMode addv = *maddv; 6029 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6030 PetscScalar value; 6031 PetscErrorCode ierr; 6032 6033 MatCheckPreallocated(mat,1); 6034 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6035 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6036 { 6037 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6038 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6039 PetscBool roworiented = aij->roworiented; 6040 6041 /* Some Variables required in the macro */ 6042 Mat A = aij->A; 6043 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6044 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6045 MatScalar *aa = a->a; 6046 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6047 Mat B = aij->B; 6048 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6049 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6050 MatScalar *ba = b->a; 6051 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6052 * cannot use "#if defined" inside a macro. */ 6053 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6054 6055 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6056 PetscInt nonew = a->nonew; 6057 MatScalar *ap1,*ap2; 6058 6059 PetscFunctionBegin; 6060 for (i=0; i<m; i++) { 6061 if (im[i] < 0) continue; 6062 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6063 if (im[i] >= rstart && im[i] < rend) { 6064 row = im[i] - rstart; 6065 lastcol1 = -1; 6066 rp1 = aj + ai[row]; 6067 ap1 = aa + ai[row]; 6068 rmax1 = aimax[row]; 6069 nrow1 = ailen[row]; 6070 low1 = 0; 6071 high1 = nrow1; 6072 lastcol2 = -1; 6073 rp2 = bj + bi[row]; 6074 ap2 = ba + bi[row]; 6075 rmax2 = bimax[row]; 6076 nrow2 = bilen[row]; 6077 low2 = 0; 6078 high2 = nrow2; 6079 6080 for (j=0; j<n; j++) { 6081 if (roworiented) value = v[i*n+j]; 6082 else value = v[i+j*m]; 6083 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6084 if (in[j] >= cstart && in[j] < cend) { 6085 col = in[j] - cstart; 6086 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6087 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6088 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6089 #endif 6090 } else if (in[j] < 0) continue; 6091 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6092 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6093 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6094 } else { 6095 if (mat->was_assembled) { 6096 if (!aij->colmap) { 6097 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6098 } 6099 #if defined(PETSC_USE_CTABLE) 6100 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6101 col--; 6102 #else 6103 col = aij->colmap[in[j]] - 1; 6104 #endif 6105 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6106 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6107 col = in[j]; 6108 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6109 B = aij->B; 6110 b = (Mat_SeqAIJ*)B->data; 6111 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6112 rp2 = bj + bi[row]; 6113 ap2 = ba + bi[row]; 6114 rmax2 = bimax[row]; 6115 nrow2 = bilen[row]; 6116 low2 = 0; 6117 high2 = nrow2; 6118 bm = aij->B->rmap->n; 6119 ba = b->a; 6120 inserted = PETSC_FALSE; 6121 } 6122 } else col = in[j]; 6123 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6124 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6125 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6126 #endif 6127 } 6128 } 6129 } else if (!aij->donotstash) { 6130 if (roworiented) { 6131 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6132 } else { 6133 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6134 } 6135 } 6136 } 6137 } 6138 PetscFunctionReturnVoid(); 6139 } 6140