1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/vecscatterimpl.h> 6 #include <petsc/private/isimpl.h> 7 #include <petscblaslapack.h> 8 #include <petscsf.h> 9 #include <petsc/private/hashseti.h> 10 11 /*MC 12 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 13 14 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 15 and MATMPIAIJ otherwise. As a result, for single process communicators, 16 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 17 for communicators controlling multiple processes. It is recommended that you call both of 18 the above preallocation routines for simplicity. 19 20 Options Database Keys: 21 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 22 23 Developer Notes: 24 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 25 enough exist. 26 27 Level: beginner 28 29 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 30 M*/ 31 32 /*MC 33 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 34 35 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 36 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 37 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 38 for communicators controlling multiple processes. It is recommended that you call both of 39 the above preallocation routines for simplicity. 40 41 Options Database Keys: 42 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 43 44 Level: beginner 45 46 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 47 M*/ 48 49 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 50 { 51 PetscErrorCode ierr; 52 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 53 54 PetscFunctionBegin; 55 if (mat->A) { 56 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 57 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 58 } 59 PetscFunctionReturn(0); 60 } 61 62 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 63 { 64 PetscErrorCode ierr; 65 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 66 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 67 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 68 const PetscInt *ia,*ib; 69 const MatScalar *aa,*bb; 70 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 71 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 72 73 PetscFunctionBegin; 74 *keptrows = 0; 75 ia = a->i; 76 ib = b->i; 77 for (i=0; i<m; i++) { 78 na = ia[i+1] - ia[i]; 79 nb = ib[i+1] - ib[i]; 80 if (!na && !nb) { 81 cnt++; 82 goto ok1; 83 } 84 aa = a->a + ia[i]; 85 for (j=0; j<na; j++) { 86 if (aa[j] != 0.0) goto ok1; 87 } 88 bb = b->a + ib[i]; 89 for (j=0; j <nb; j++) { 90 if (bb[j] != 0.0) goto ok1; 91 } 92 cnt++; 93 ok1:; 94 } 95 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 96 if (!n0rows) PetscFunctionReturn(0); 97 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 98 cnt = 0; 99 for (i=0; i<m; i++) { 100 na = ia[i+1] - ia[i]; 101 nb = ib[i+1] - ib[i]; 102 if (!na && !nb) continue; 103 aa = a->a + ia[i]; 104 for (j=0; j<na;j++) { 105 if (aa[j] != 0.0) { 106 rows[cnt++] = rstart + i; 107 goto ok2; 108 } 109 } 110 bb = b->a + ib[i]; 111 for (j=0; j<nb; j++) { 112 if (bb[j] != 0.0) { 113 rows[cnt++] = rstart + i; 114 goto ok2; 115 } 116 } 117 ok2:; 118 } 119 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 124 { 125 PetscErrorCode ierr; 126 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 127 PetscBool cong; 128 129 PetscFunctionBegin; 130 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 131 if (Y->assembled && cong) { 132 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 133 } else { 134 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 135 } 136 PetscFunctionReturn(0); 137 } 138 139 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 140 { 141 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 142 PetscErrorCode ierr; 143 PetscInt i,rstart,nrows,*rows; 144 145 PetscFunctionBegin; 146 *zrows = NULL; 147 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 148 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 149 for (i=0; i<nrows; i++) rows[i] += rstart; 150 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 151 PetscFunctionReturn(0); 152 } 153 154 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 155 { 156 PetscErrorCode ierr; 157 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 158 PetscInt i,n,*garray = aij->garray; 159 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 160 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 161 PetscReal *work; 162 163 PetscFunctionBegin; 164 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 165 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 168 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 169 } 170 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 171 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 172 } 173 } else if (type == NORM_1) { 174 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 175 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 176 } 177 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 178 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 179 } 180 } else if (type == NORM_INFINITY) { 181 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 182 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 183 } 184 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 185 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 186 } 187 188 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 189 if (type == NORM_INFINITY) { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } else { 192 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 193 } 194 ierr = PetscFree(work);CHKERRQ(ierr); 195 if (type == NORM_2) { 196 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 197 } 198 PetscFunctionReturn(0); 199 } 200 201 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 202 { 203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 204 IS sis,gis; 205 PetscErrorCode ierr; 206 const PetscInt *isis,*igis; 207 PetscInt n,*iis,nsis,ngis,rstart,i; 208 209 PetscFunctionBegin; 210 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 211 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 212 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 213 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 214 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 215 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 216 217 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 218 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 219 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 220 n = ngis + nsis; 221 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 222 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 223 for (i=0; i<n; i++) iis[i] += rstart; 224 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 225 226 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 227 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 228 ierr = ISDestroy(&sis);CHKERRQ(ierr); 229 ierr = ISDestroy(&gis);CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 233 /* 234 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 235 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 236 237 Only for square matrices 238 239 Used by a preconditioner, hence PETSC_EXTERN 240 */ 241 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 242 { 243 PetscMPIInt rank,size; 244 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 245 PetscErrorCode ierr; 246 Mat mat; 247 Mat_SeqAIJ *gmata; 248 PetscMPIInt tag; 249 MPI_Status status; 250 PetscBool aij; 251 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 252 253 PetscFunctionBegin; 254 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 255 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 256 if (!rank) { 257 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 258 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 259 } 260 if (reuse == MAT_INITIAL_MATRIX) { 261 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 262 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 263 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 264 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 265 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 266 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 267 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 268 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 269 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 270 271 rowners[0] = 0; 272 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 273 rstart = rowners[rank]; 274 rend = rowners[rank+1]; 275 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 276 if (!rank) { 277 gmata = (Mat_SeqAIJ*) gmat->data; 278 /* send row lengths to all processors */ 279 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 280 for (i=1; i<size; i++) { 281 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 282 } 283 /* determine number diagonal and off-diagonal counts */ 284 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 285 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 286 jj = 0; 287 for (i=0; i<m; i++) { 288 for (j=0; j<dlens[i]; j++) { 289 if (gmata->j[jj] < rstart) ld[i]++; 290 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 291 jj++; 292 } 293 } 294 /* send column indices to other processes */ 295 for (i=1; i<size; i++) { 296 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 297 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 298 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 301 /* send numerical values to other processes */ 302 for (i=1; i<size; i++) { 303 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 304 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 305 } 306 gmataa = gmata->a; 307 gmataj = gmata->j; 308 309 } else { 310 /* receive row lengths */ 311 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 /* receive column indices */ 313 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 315 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 316 /* determine number diagonal and off-diagonal counts */ 317 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 318 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 319 jj = 0; 320 for (i=0; i<m; i++) { 321 for (j=0; j<dlens[i]; j++) { 322 if (gmataj[jj] < rstart) ld[i]++; 323 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 324 jj++; 325 } 326 } 327 /* receive numerical values */ 328 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 329 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 330 } 331 /* set preallocation */ 332 for (i=0; i<m; i++) { 333 dlens[i] -= olens[i]; 334 } 335 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 336 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 337 338 for (i=0; i<m; i++) { 339 dlens[i] += olens[i]; 340 } 341 cnt = 0; 342 for (i=0; i<m; i++) { 343 row = rstart + i; 344 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 345 cnt += dlens[i]; 346 } 347 if (rank) { 348 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 349 } 350 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 351 ierr = PetscFree(rowners);CHKERRQ(ierr); 352 353 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 354 355 *inmat = mat; 356 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 357 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 358 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 359 mat = *inmat; 360 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 361 if (!rank) { 362 /* send numerical values to other processes */ 363 gmata = (Mat_SeqAIJ*) gmat->data; 364 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 365 gmataa = gmata->a; 366 for (i=1; i<size; i++) { 367 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 368 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 369 } 370 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 371 } else { 372 /* receive numerical values from process 0*/ 373 nz = Ad->nz + Ao->nz; 374 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 375 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 376 } 377 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 378 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 379 ad = Ad->a; 380 ao = Ao->a; 381 if (mat->rmap->n) { 382 i = 0; 383 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 384 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 385 } 386 for (i=1; i<mat->rmap->n; i++) { 387 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 388 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 389 } 390 i--; 391 if (mat->rmap->n) { 392 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 393 } 394 if (rank) { 395 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 396 } 397 } 398 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 399 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 400 PetscFunctionReturn(0); 401 } 402 403 /* 404 Local utility routine that creates a mapping from the global column 405 number to the local number in the off-diagonal part of the local 406 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 407 a slightly higher hash table cost; without it it is not scalable (each processor 408 has an order N integer array but is fast to acess. 409 */ 410 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt n = aij->B->cmap->n,i; 415 416 PetscFunctionBegin; 417 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 418 #if defined(PETSC_USE_CTABLE) 419 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 420 for (i=0; i<n; i++) { 421 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 422 } 423 #else 424 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 425 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 426 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 427 #endif 428 PetscFunctionReturn(0); 429 } 430 431 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 432 { \ 433 if (col <= lastcol1) low1 = 0; \ 434 else high1 = nrow1; \ 435 lastcol1 = col;\ 436 while (high1-low1 > 5) { \ 437 t = (low1+high1)/2; \ 438 if (rp1[t] > col) high1 = t; \ 439 else low1 = t; \ 440 } \ 441 for (_i=low1; _i<high1; _i++) { \ 442 if (rp1[_i] > col) break; \ 443 if (rp1[_i] == col) { \ 444 if (addv == ADD_VALUES) { \ 445 ap1[_i] += value; \ 446 /* Not sure LogFlops will slow dow the code or not */ \ 447 (void)PetscLogFlops(1.0); \ 448 } \ 449 else ap1[_i] = value; \ 450 goto a_noinsert; \ 451 } \ 452 } \ 453 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 454 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 455 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 456 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 457 N = nrow1++ - 1; a->nz++; high1++; \ 458 /* shift up all the later entries in this row */ \ 459 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 460 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 461 rp1[_i] = col; \ 462 ap1[_i] = value; \ 463 A->nonzerostate++;\ 464 a_noinsert: ; \ 465 ailen[row] = nrow1; \ 466 } 467 468 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 469 { \ 470 if (col <= lastcol2) low2 = 0; \ 471 else high2 = nrow2; \ 472 lastcol2 = col; \ 473 while (high2-low2 > 5) { \ 474 t = (low2+high2)/2; \ 475 if (rp2[t] > col) high2 = t; \ 476 else low2 = t; \ 477 } \ 478 for (_i=low2; _i<high2; _i++) { \ 479 if (rp2[_i] > col) break; \ 480 if (rp2[_i] == col) { \ 481 if (addv == ADD_VALUES) { \ 482 ap2[_i] += value; \ 483 (void)PetscLogFlops(1.0); \ 484 } \ 485 else ap2[_i] = value; \ 486 goto b_noinsert; \ 487 } \ 488 } \ 489 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 490 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 491 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 492 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 493 N = nrow2++ - 1; b->nz++; high2++; \ 494 /* shift up all the later entries in this row */ \ 495 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 496 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 497 rp2[_i] = col; \ 498 ap2[_i] = value; \ 499 B->nonzerostate++; \ 500 b_noinsert: ; \ 501 bilen[row] = nrow2; \ 502 } 503 504 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 505 { 506 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 507 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 508 PetscErrorCode ierr; 509 PetscInt l,*garray = mat->garray,diag; 510 511 PetscFunctionBegin; 512 /* code only works for square matrices A */ 513 514 /* find size of row to the left of the diagonal part */ 515 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 516 row = row - diag; 517 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 518 if (garray[b->j[b->i[row]+l]] > diag) break; 519 } 520 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 521 522 /* diagonal part */ 523 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 524 525 /* right of diagonal part */ 526 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 531 { 532 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 533 PetscScalar value = 0.0; 534 PetscErrorCode ierr; 535 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 536 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 537 PetscBool roworiented = aij->roworiented; 538 539 /* Some Variables required in the macro */ 540 Mat A = aij->A; 541 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 542 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 543 MatScalar *aa = a->a; 544 PetscBool ignorezeroentries = a->ignorezeroentries; 545 Mat B = aij->B; 546 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 547 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 548 MatScalar *ba = b->a; 549 550 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 551 PetscInt nonew; 552 MatScalar *ap1,*ap2; 553 554 PetscFunctionBegin; 555 for (i=0; i<m; i++) { 556 if (im[i] < 0) continue; 557 #if defined(PETSC_USE_DEBUG) 558 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 559 #endif 560 if (im[i] >= rstart && im[i] < rend) { 561 row = im[i] - rstart; 562 lastcol1 = -1; 563 rp1 = aj + ai[row]; 564 ap1 = aa + ai[row]; 565 rmax1 = aimax[row]; 566 nrow1 = ailen[row]; 567 low1 = 0; 568 high1 = nrow1; 569 lastcol2 = -1; 570 rp2 = bj + bi[row]; 571 ap2 = ba + bi[row]; 572 rmax2 = bimax[row]; 573 nrow2 = bilen[row]; 574 low2 = 0; 575 high2 = nrow2; 576 577 for (j=0; j<n; j++) { 578 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 583 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 584 } else if (in[j] < 0) continue; 585 #if defined(PETSC_USE_DEBUG) 586 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 587 #endif 588 else { 589 if (mat->was_assembled) { 590 if (!aij->colmap) { 591 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 592 } 593 #if defined(PETSC_USE_CTABLE) 594 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 595 col--; 596 #else 597 col = aij->colmap[in[j]] - 1; 598 #endif 599 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 600 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 601 col = in[j]; 602 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 603 B = aij->B; 604 b = (Mat_SeqAIJ*)B->data; 605 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 606 rp2 = bj + bi[row]; 607 ap2 = ba + bi[row]; 608 rmax2 = bimax[row]; 609 nrow2 = bilen[row]; 610 low2 = 0; 611 high2 = nrow2; 612 bm = aij->B->rmap->n; 613 ba = b->a; 614 } else if (col < 0) { 615 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 616 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 617 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 618 } 619 } else col = in[j]; 620 nonew = b->nonew; 621 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 622 } 623 } 624 } else { 625 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 626 if (!aij->donotstash) { 627 mat->assembled = PETSC_FALSE; 628 if (roworiented) { 629 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 630 } else { 631 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 632 } 633 } 634 } 635 } 636 PetscFunctionReturn(0); 637 } 638 639 /* 640 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 641 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 642 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 643 */ 644 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 645 { 646 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 647 Mat A = aij->A; /* diagonal part of the matrix */ 648 Mat B = aij->B; /* offdiagonal part of the matrix */ 649 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 650 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 651 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 652 PetscInt *ailen = a->ilen,*aj = a->j; 653 PetscInt *bilen = b->ilen,*bj = b->j; 654 PetscInt am = aij->A->rmap->n,j; 655 PetscInt diag_so_far = 0,dnz; 656 PetscInt offd_so_far = 0,onz; 657 658 PetscFunctionBegin; 659 /* Iterate over all rows of the matrix */ 660 for (j=0; j<am; j++) { 661 dnz = onz = 0; 662 /* Iterate over all non-zero columns of the current row */ 663 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 664 /* If column is in the diagonal */ 665 if (mat_j[col] >= cstart && mat_j[col] < cend) { 666 aj[diag_so_far++] = mat_j[col] - cstart; 667 dnz++; 668 } else { /* off-diagonal entries */ 669 bj[offd_so_far++] = mat_j[col]; 670 onz++; 671 } 672 } 673 ailen[j] = dnz; 674 bilen[j] = onz; 675 } 676 PetscFunctionReturn(0); 677 } 678 679 /* 680 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 681 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 682 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 683 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 684 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 685 */ 686 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 687 { 688 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 689 Mat A = aij->A; /* diagonal part of the matrix */ 690 Mat B = aij->B; /* offdiagonal part of the matrix */ 691 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 692 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 693 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 694 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 695 PetscInt *ailen = a->ilen,*aj = a->j; 696 PetscInt *bilen = b->ilen,*bj = b->j; 697 PetscInt am = aij->A->rmap->n,j; 698 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 699 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 700 PetscScalar *aa = a->a,*ba = b->a; 701 702 PetscFunctionBegin; 703 /* Iterate over all rows of the matrix */ 704 for (j=0; j<am; j++) { 705 dnz_row = onz_row = 0; 706 rowstart_offd = full_offd_i[j]; 707 rowstart_diag = full_diag_i[j]; 708 /* Iterate over all non-zero columns of the current row */ 709 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 710 /* If column is in the diagonal */ 711 if (mat_j[col] >= cstart && mat_j[col] < cend) { 712 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 713 aa[rowstart_diag+dnz_row] = mat_a[col]; 714 dnz_row++; 715 } else { /* off-diagonal entries */ 716 bj[rowstart_offd+onz_row] = mat_j[col]; 717 ba[rowstart_offd+onz_row] = mat_a[col]; 718 onz_row++; 719 } 720 } 721 ailen[j] = dnz_row; 722 bilen[j] = onz_row; 723 } 724 PetscFunctionReturn(0); 725 } 726 727 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 730 PetscErrorCode ierr; 731 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 732 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 733 734 PetscFunctionBegin; 735 for (i=0; i<m; i++) { 736 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 737 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 738 if (idxm[i] >= rstart && idxm[i] < rend) { 739 row = idxm[i] - rstart; 740 for (j=0; j<n; j++) { 741 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 742 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 743 if (idxn[j] >= cstart && idxn[j] < cend) { 744 col = idxn[j] - cstart; 745 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 746 } else { 747 if (!aij->colmap) { 748 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 749 } 750 #if defined(PETSC_USE_CTABLE) 751 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 752 col--; 753 #else 754 col = aij->colmap[idxn[j]] - 1; 755 #endif 756 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 757 else { 758 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 759 } 760 } 761 } 762 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 763 } 764 PetscFunctionReturn(0); 765 } 766 767 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 768 769 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 770 { 771 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 772 PetscErrorCode ierr; 773 PetscInt nstash,reallocs; 774 775 PetscFunctionBegin; 776 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 777 778 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 779 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 780 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 781 PetscFunctionReturn(0); 782 } 783 784 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 785 { 786 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 787 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 788 PetscErrorCode ierr; 789 PetscMPIInt n; 790 PetscInt i,j,rstart,ncols,flg; 791 PetscInt *row,*col; 792 PetscBool other_disassembled; 793 PetscScalar *val; 794 795 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 796 797 PetscFunctionBegin; 798 if (!aij->donotstash && !mat->nooffprocentries) { 799 while (1) { 800 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 801 if (!flg) break; 802 803 for (i=0; i<n; ) { 804 /* Now identify the consecutive vals belonging to the same row */ 805 for (j=i,rstart=row[j]; j<n; j++) { 806 if (row[j] != rstart) break; 807 } 808 if (j < n) ncols = j-i; 809 else ncols = n-i; 810 /* Now assemble all these values with a single function call */ 811 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 812 813 i = j; 814 } 815 } 816 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 817 } 818 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 819 if (mat->valid_GPU_matrix == PETSC_OFFLOAD_CPU) aij->A->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 820 #endif 821 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 823 824 /* determine if any processor has disassembled, if so we must 825 also disassemble ourself, in order that we may reassemble. */ 826 /* 827 if nonzero structure of submatrix B cannot change then we know that 828 no processor disassembled thus we can skip this stuff 829 */ 830 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 831 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 832 if (mat->was_assembled && !other_disassembled) { 833 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 834 aij->B->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 835 #endif 836 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 837 } 838 } 839 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 840 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 841 } 842 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 843 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 844 if (mat->valid_GPU_matrix == PETSC_OFFLOAD_CPU && aij->B->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) aij->B->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 845 #endif 846 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 847 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 848 849 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 850 851 aij->rowvalues = 0; 852 853 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 854 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 855 856 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 857 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 858 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 859 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 860 } 861 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 862 mat->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 863 #endif 864 PetscFunctionReturn(0); 865 } 866 867 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 868 { 869 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 870 PetscErrorCode ierr; 871 872 PetscFunctionBegin; 873 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 874 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 879 { 880 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 881 PetscObjectState sA, sB; 882 PetscInt *lrows; 883 PetscInt r, len; 884 PetscBool cong, lch, gch; 885 PetscErrorCode ierr; 886 887 PetscFunctionBegin; 888 /* get locally owned rows */ 889 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 890 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 891 /* fix right hand side if needed */ 892 if (x && b) { 893 const PetscScalar *xx; 894 PetscScalar *bb; 895 896 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 897 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 898 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 899 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 900 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 901 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 902 } 903 904 sA = mat->A->nonzerostate; 905 sB = mat->B->nonzerostate; 906 907 if (diag != 0.0 && cong) { 908 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 909 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 910 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 911 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 912 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 913 PetscInt nnwA, nnwB; 914 PetscBool nnzA, nnzB; 915 916 nnwA = aijA->nonew; 917 nnwB = aijB->nonew; 918 nnzA = aijA->keepnonzeropattern; 919 nnzB = aijB->keepnonzeropattern; 920 if (!nnzA) { 921 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 922 aijA->nonew = 0; 923 } 924 if (!nnzB) { 925 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 926 aijB->nonew = 0; 927 } 928 /* Must zero here before the next loop */ 929 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 930 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) { 932 const PetscInt row = lrows[r] + A->rmap->rstart; 933 if (row >= A->cmap->N) continue; 934 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 935 } 936 aijA->nonew = nnwA; 937 aijB->nonew = nnwB; 938 } else { 939 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 940 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 941 } 942 ierr = PetscFree(lrows);CHKERRQ(ierr); 943 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 944 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 945 946 /* reduce nonzerostate */ 947 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 948 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 949 if (gch) A->nonzerostate++; 950 PetscFunctionReturn(0); 951 } 952 953 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 954 { 955 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscMPIInt n = A->rmap->n; 958 PetscInt i,j,r,m,p = 0,len = 0; 959 PetscInt *lrows,*owners = A->rmap->range; 960 PetscSFNode *rrows; 961 PetscSF sf; 962 const PetscScalar *xx; 963 PetscScalar *bb,*mask; 964 Vec xmask,lmask; 965 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 966 const PetscInt *aj, *ii,*ridx; 967 PetscScalar *aa; 968 969 PetscFunctionBegin; 970 /* Create SF where leaves are input rows and roots are owned rows */ 971 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 972 for (r = 0; r < n; ++r) lrows[r] = -1; 973 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 974 for (r = 0; r < N; ++r) { 975 const PetscInt idx = rows[r]; 976 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 977 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 978 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 979 } 980 rrows[r].rank = p; 981 rrows[r].index = rows[r] - owners[p]; 982 } 983 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 984 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 985 /* Collect flags for rows to be zeroed */ 986 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 987 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 988 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 989 /* Compress and put in row numbers */ 990 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 991 /* zero diagonal part of matrix */ 992 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 993 /* handle off diagonal part of matrix */ 994 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 995 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 996 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 997 for (i=0; i<len; i++) bb[lrows[i]] = 1; 998 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 999 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1000 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1002 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1003 PetscBool cong; 1004 1005 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1006 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1007 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1010 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1011 } 1012 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1013 /* remove zeroed rows of off diagonal matrix */ 1014 ii = aij->i; 1015 for (i=0; i<len; i++) { 1016 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1017 } 1018 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1019 if (aij->compressedrow.use) { 1020 m = aij->compressedrow.nrows; 1021 ii = aij->compressedrow.i; 1022 ridx = aij->compressedrow.rindex; 1023 for (i=0; i<m; i++) { 1024 n = ii[i+1] - ii[i]; 1025 aj = aij->j + ii[i]; 1026 aa = aij->a + ii[i]; 1027 1028 for (j=0; j<n; j++) { 1029 if (PetscAbsScalar(mask[*aj])) { 1030 if (b) bb[*ridx] -= *aa*xx[*aj]; 1031 *aa = 0.0; 1032 } 1033 aa++; 1034 aj++; 1035 } 1036 ridx++; 1037 } 1038 } else { /* do not use compressed row format */ 1039 m = l->B->rmap->n; 1040 for (i=0; i<m; i++) { 1041 n = ii[i+1] - ii[i]; 1042 aj = aij->j + ii[i]; 1043 aa = aij->a + ii[i]; 1044 for (j=0; j<n; j++) { 1045 if (PetscAbsScalar(mask[*aj])) { 1046 if (b) bb[i] -= *aa*xx[*aj]; 1047 *aa = 0.0; 1048 } 1049 aa++; 1050 aj++; 1051 } 1052 } 1053 } 1054 if (x && b) { 1055 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1056 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1057 } 1058 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1059 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1060 ierr = PetscFree(lrows);CHKERRQ(ierr); 1061 1062 /* only change matrix nonzero state if pattern was allowed to be changed */ 1063 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1064 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1065 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1073 PetscErrorCode ierr; 1074 PetscInt nt; 1075 VecScatter Mvctx = a->Mvctx; 1076 1077 PetscFunctionBegin; 1078 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1079 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1080 1081 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1082 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1083 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1084 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1089 { 1090 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 1093 PetscFunctionBegin; 1094 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 VecScatter Mvctx = a->Mvctx; 1103 1104 PetscFunctionBegin; 1105 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1106 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1107 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1108 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1109 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1114 { 1115 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1116 PetscErrorCode ierr; 1117 1118 PetscFunctionBegin; 1119 /* do nondiagonal part */ 1120 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1121 /* do local part */ 1122 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1123 /* add partial results together */ 1124 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1125 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1130 { 1131 MPI_Comm comm; 1132 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1133 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1134 IS Me,Notme; 1135 PetscErrorCode ierr; 1136 PetscInt M,N,first,last,*notme,i; 1137 PetscBool lf; 1138 PetscMPIInt size; 1139 1140 PetscFunctionBegin; 1141 /* Easy test: symmetric diagonal block */ 1142 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1143 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1144 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1145 if (!*f) PetscFunctionReturn(0); 1146 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1147 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1148 if (size == 1) PetscFunctionReturn(0); 1149 1150 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1151 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1152 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1153 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1154 for (i=0; i<first; i++) notme[i] = i; 1155 for (i=last; i<M; i++) notme[i-last+first] = i; 1156 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1157 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1158 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1159 Aoff = Aoffs[0]; 1160 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1161 Boff = Boffs[0]; 1162 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1163 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1164 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1165 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1166 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1167 ierr = PetscFree(notme);CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1172 { 1173 PetscErrorCode ierr; 1174 1175 PetscFunctionBegin; 1176 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1181 { 1182 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1183 PetscErrorCode ierr; 1184 1185 PetscFunctionBegin; 1186 /* do nondiagonal part */ 1187 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1188 /* do local part */ 1189 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1190 /* add partial results together */ 1191 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1192 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 /* 1197 This only works correctly for square matrices where the subblock A->A is the 1198 diagonal block 1199 */ 1200 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1201 { 1202 PetscErrorCode ierr; 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 1205 PetscFunctionBegin; 1206 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1207 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1208 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1213 { 1214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1219 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1224 { 1225 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 #if defined(PETSC_USE_LOG) 1230 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1231 #endif 1232 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1233 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1234 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1235 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1236 #if defined(PETSC_USE_CTABLE) 1237 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1238 #else 1239 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1240 #endif 1241 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1242 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1243 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1244 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1245 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1246 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1247 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1248 1249 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1250 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1252 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1253 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1254 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1255 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1256 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1257 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1258 #if defined(PETSC_HAVE_ELEMENTAL) 1259 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1260 #endif 1261 #if defined(PETSC_HAVE_HYPRE) 1262 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1263 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1264 #endif 1265 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1266 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1267 PetscFunctionReturn(0); 1268 } 1269 1270 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1274 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1275 PetscErrorCode ierr; 1276 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1277 int fd; 1278 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1279 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1280 PetscScalar *column_values; 1281 PetscInt message_count,flowcontrolcount; 1282 FILE *file; 1283 1284 PetscFunctionBegin; 1285 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1286 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1287 nz = A->nz + B->nz; 1288 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1289 if (!rank) { 1290 header[0] = MAT_FILE_CLASSID; 1291 header[1] = mat->rmap->N; 1292 header[2] = mat->cmap->N; 1293 1294 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1295 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1296 /* get largest number of rows any processor has */ 1297 rlen = mat->rmap->n; 1298 range = mat->rmap->range; 1299 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1300 } else { 1301 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1302 rlen = mat->rmap->n; 1303 } 1304 1305 /* load up the local row counts */ 1306 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1307 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1308 1309 /* store the row lengths to the file */ 1310 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1311 if (!rank) { 1312 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1313 for (i=1; i<size; i++) { 1314 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1315 rlen = range[i+1] - range[i]; 1316 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1317 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1318 } 1319 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1320 } else { 1321 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1322 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1323 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1324 } 1325 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1326 1327 /* load up the local column indices */ 1328 nzmax = nz; /* th processor needs space a largest processor needs */ 1329 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1330 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1331 cnt = 0; 1332 for (i=0; i<mat->rmap->n; i++) { 1333 for (j=B->i[i]; j<B->i[i+1]; j++) { 1334 if ((col = garray[B->j[j]]) > cstart) break; 1335 column_indices[cnt++] = col; 1336 } 1337 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1338 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1339 } 1340 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1341 1342 /* store the column indices to the file */ 1343 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1344 if (!rank) { 1345 MPI_Status status; 1346 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1347 for (i=1; i<size; i++) { 1348 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1349 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1350 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1351 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1352 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1353 } 1354 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1355 } else { 1356 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1357 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1358 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1359 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1360 } 1361 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1362 1363 /* load up the local column values */ 1364 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1365 cnt = 0; 1366 for (i=0; i<mat->rmap->n; i++) { 1367 for (j=B->i[i]; j<B->i[i+1]; j++) { 1368 if (garray[B->j[j]] > cstart) break; 1369 column_values[cnt++] = B->a[j]; 1370 } 1371 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1372 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1373 } 1374 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1375 1376 /* store the column values to the file */ 1377 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1378 if (!rank) { 1379 MPI_Status status; 1380 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1381 for (i=1; i<size; i++) { 1382 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1383 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1384 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1385 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1386 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1387 } 1388 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1389 } else { 1390 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1391 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1392 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1393 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1394 } 1395 ierr = PetscFree(column_values);CHKERRQ(ierr); 1396 1397 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1398 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #include <petscdraw.h> 1403 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1404 { 1405 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1406 PetscErrorCode ierr; 1407 PetscMPIInt rank = aij->rank,size = aij->size; 1408 PetscBool isdraw,iascii,isbinary; 1409 PetscViewer sviewer; 1410 PetscViewerFormat format; 1411 1412 PetscFunctionBegin; 1413 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1414 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1415 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1416 if (iascii) { 1417 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1418 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1419 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1420 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1421 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1422 for (i=0; i<(PetscInt)size; i++) { 1423 nmax = PetscMax(nmax,nz[i]); 1424 nmin = PetscMin(nmin,nz[i]); 1425 navg += nz[i]; 1426 } 1427 ierr = PetscFree(nz);CHKERRQ(ierr); 1428 navg = navg/size; 1429 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1433 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1434 MatInfo info; 1435 PetscBool inodes; 1436 1437 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1438 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1439 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1440 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1441 if (!inodes) { 1442 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1443 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1444 } else { 1445 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1446 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1447 } 1448 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1449 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1450 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1451 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1452 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1454 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1455 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1456 PetscFunctionReturn(0); 1457 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1458 PetscInt inodecount,inodelimit,*inodes; 1459 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1460 if (inodes) { 1461 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1462 } else { 1463 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1464 } 1465 PetscFunctionReturn(0); 1466 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1467 PetscFunctionReturn(0); 1468 } 1469 } else if (isbinary) { 1470 if (size == 1) { 1471 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1472 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1473 } else { 1474 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1475 } 1476 PetscFunctionReturn(0); 1477 } else if (iascii && size == 1) { 1478 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1479 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } else if (isdraw) { 1482 PetscDraw draw; 1483 PetscBool isnull; 1484 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1485 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1486 if (isnull) PetscFunctionReturn(0); 1487 } 1488 1489 { /* assemble the entire matrix onto first processor */ 1490 Mat A = NULL, Av; 1491 IS isrow,iscol; 1492 1493 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1494 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1495 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1496 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1497 /* The commented code uses MatCreateSubMatrices instead */ 1498 /* 1499 Mat *AA, A = NULL, Av; 1500 IS isrow,iscol; 1501 1502 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1503 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1504 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1505 if (!rank) { 1506 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1507 A = AA[0]; 1508 Av = AA[0]; 1509 } 1510 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1511 */ 1512 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1513 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1514 /* 1515 Everyone has to call to draw the matrix since the graphics waits are 1516 synchronized across all processors that share the PetscDraw object 1517 */ 1518 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1519 if (!rank) { 1520 if (((PetscObject)mat)->name) { 1521 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1522 } 1523 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1524 } 1525 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1526 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1527 ierr = MatDestroy(&A);CHKERRQ(ierr); 1528 } 1529 PetscFunctionReturn(0); 1530 } 1531 1532 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1533 { 1534 PetscErrorCode ierr; 1535 PetscBool iascii,isdraw,issocket,isbinary; 1536 1537 PetscFunctionBegin; 1538 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1539 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1540 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1541 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1542 if (iascii || isdraw || isbinary || issocket) { 1543 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1544 } 1545 PetscFunctionReturn(0); 1546 } 1547 1548 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1549 { 1550 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1551 PetscErrorCode ierr; 1552 Vec bb1 = 0; 1553 PetscBool hasop; 1554 1555 PetscFunctionBegin; 1556 if (flag == SOR_APPLY_UPPER) { 1557 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1562 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1563 } 1564 1565 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1566 if (flag & SOR_ZERO_INITIAL_GUESS) { 1567 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1568 its--; 1569 } 1570 1571 while (its--) { 1572 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1573 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 1575 /* update rhs: bb1 = bb - B*x */ 1576 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1577 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1578 1579 /* local sweep */ 1580 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1581 } 1582 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1583 if (flag & SOR_ZERO_INITIAL_GUESS) { 1584 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1585 its--; 1586 } 1587 while (its--) { 1588 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1589 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1590 1591 /* update rhs: bb1 = bb - B*x */ 1592 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1593 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1594 1595 /* local sweep */ 1596 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1597 } 1598 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1599 if (flag & SOR_ZERO_INITIAL_GUESS) { 1600 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1601 its--; 1602 } 1603 while (its--) { 1604 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 1607 /* update rhs: bb1 = bb - B*x */ 1608 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1609 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1610 1611 /* local sweep */ 1612 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1613 } 1614 } else if (flag & SOR_EISENSTAT) { 1615 Vec xx1; 1616 1617 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1618 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1619 1620 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1621 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1622 if (!mat->diag) { 1623 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1624 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1625 } 1626 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1627 if (hasop) { 1628 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1629 } else { 1630 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1631 } 1632 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1633 1634 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1635 1636 /* local sweep */ 1637 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1638 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1639 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1640 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1641 1642 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1643 1644 matin->factorerrortype = mat->A->factorerrortype; 1645 PetscFunctionReturn(0); 1646 } 1647 1648 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1649 { 1650 Mat aA,aB,Aperm; 1651 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1652 PetscScalar *aa,*ba; 1653 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1654 PetscSF rowsf,sf; 1655 IS parcolp = NULL; 1656 PetscBool done; 1657 PetscErrorCode ierr; 1658 1659 PetscFunctionBegin; 1660 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1661 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1662 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1663 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1664 1665 /* Invert row permutation to find out where my rows should go */ 1666 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1667 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1668 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1669 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1670 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1671 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1672 1673 /* Invert column permutation to find out where my columns should go */ 1674 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1675 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1676 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1677 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1678 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1679 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1680 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1681 1682 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1683 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1684 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1685 1686 /* Find out where my gcols should go */ 1687 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1688 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1689 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1690 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1691 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1692 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1693 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1694 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1695 1696 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1697 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1698 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1699 for (i=0; i<m; i++) { 1700 PetscInt row = rdest[i],rowner; 1701 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1702 for (j=ai[i]; j<ai[i+1]; j++) { 1703 PetscInt cowner,col = cdest[aj[j]]; 1704 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1705 if (rowner == cowner) dnnz[i]++; 1706 else onnz[i]++; 1707 } 1708 for (j=bi[i]; j<bi[i+1]; j++) { 1709 PetscInt cowner,col = gcdest[bj[j]]; 1710 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1711 if (rowner == cowner) dnnz[i]++; 1712 else onnz[i]++; 1713 } 1714 } 1715 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1716 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1717 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1718 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1719 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1720 1721 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1722 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1723 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1724 for (i=0; i<m; i++) { 1725 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1726 PetscInt j0,rowlen; 1727 rowlen = ai[i+1] - ai[i]; 1728 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1729 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1730 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1731 } 1732 rowlen = bi[i+1] - bi[i]; 1733 for (j0=j=0; j<rowlen; j0=j) { 1734 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1735 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1736 } 1737 } 1738 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1739 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1740 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1741 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1742 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1743 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1744 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1745 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1746 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1747 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1748 *B = Aperm; 1749 PetscFunctionReturn(0); 1750 } 1751 1752 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1753 { 1754 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1755 PetscErrorCode ierr; 1756 1757 PetscFunctionBegin; 1758 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1759 if (ghosts) *ghosts = aij->garray; 1760 PetscFunctionReturn(0); 1761 } 1762 1763 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1764 { 1765 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1766 Mat A = mat->A,B = mat->B; 1767 PetscErrorCode ierr; 1768 PetscReal isend[5],irecv[5]; 1769 1770 PetscFunctionBegin; 1771 info->block_size = 1.0; 1772 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1773 1774 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1775 isend[3] = info->memory; isend[4] = info->mallocs; 1776 1777 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1778 1779 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1780 isend[3] += info->memory; isend[4] += info->mallocs; 1781 if (flag == MAT_LOCAL) { 1782 info->nz_used = isend[0]; 1783 info->nz_allocated = isend[1]; 1784 info->nz_unneeded = isend[2]; 1785 info->memory = isend[3]; 1786 info->mallocs = isend[4]; 1787 } else if (flag == MAT_GLOBAL_MAX) { 1788 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1789 1790 info->nz_used = irecv[0]; 1791 info->nz_allocated = irecv[1]; 1792 info->nz_unneeded = irecv[2]; 1793 info->memory = irecv[3]; 1794 info->mallocs = irecv[4]; 1795 } else if (flag == MAT_GLOBAL_SUM) { 1796 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1797 1798 info->nz_used = irecv[0]; 1799 info->nz_allocated = irecv[1]; 1800 info->nz_unneeded = irecv[2]; 1801 info->memory = irecv[3]; 1802 info->mallocs = irecv[4]; 1803 } 1804 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1805 info->fill_ratio_needed = 0; 1806 info->factor_mallocs = 0; 1807 PetscFunctionReturn(0); 1808 } 1809 1810 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1811 { 1812 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1813 PetscErrorCode ierr; 1814 1815 PetscFunctionBegin; 1816 switch (op) { 1817 case MAT_NEW_NONZERO_LOCATIONS: 1818 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1819 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1820 case MAT_KEEP_NONZERO_PATTERN: 1821 case MAT_NEW_NONZERO_LOCATION_ERR: 1822 case MAT_USE_INODES: 1823 case MAT_IGNORE_ZERO_ENTRIES: 1824 MatCheckPreallocated(A,1); 1825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1826 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1827 break; 1828 case MAT_ROW_ORIENTED: 1829 MatCheckPreallocated(A,1); 1830 a->roworiented = flg; 1831 1832 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1833 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1834 break; 1835 case MAT_NEW_DIAGONALS: 1836 case MAT_SORTED_FULL: 1837 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1838 break; 1839 case MAT_IGNORE_OFF_PROC_ENTRIES: 1840 a->donotstash = flg; 1841 break; 1842 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1843 case MAT_SPD: 1844 case MAT_SYMMETRIC: 1845 case MAT_STRUCTURALLY_SYMMETRIC: 1846 case MAT_HERMITIAN: 1847 case MAT_SYMMETRY_ETERNAL: 1848 break; 1849 case MAT_SUBMAT_SINGLEIS: 1850 A->submat_singleis = flg; 1851 break; 1852 case MAT_STRUCTURE_ONLY: 1853 /* The option is handled directly by MatSetOption() */ 1854 break; 1855 default: 1856 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1862 { 1863 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1864 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1865 PetscErrorCode ierr; 1866 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1867 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1868 PetscInt *cmap,*idx_p; 1869 1870 PetscFunctionBegin; 1871 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1872 mat->getrowactive = PETSC_TRUE; 1873 1874 if (!mat->rowvalues && (idx || v)) { 1875 /* 1876 allocate enough space to hold information from the longest row. 1877 */ 1878 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1879 PetscInt max = 1,tmp; 1880 for (i=0; i<matin->rmap->n; i++) { 1881 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1882 if (max < tmp) max = tmp; 1883 } 1884 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1885 } 1886 1887 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1888 lrow = row - rstart; 1889 1890 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1891 if (!v) {pvA = 0; pvB = 0;} 1892 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1893 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1894 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1895 nztot = nzA + nzB; 1896 1897 cmap = mat->garray; 1898 if (v || idx) { 1899 if (nztot) { 1900 /* Sort by increasing column numbers, assuming A and B already sorted */ 1901 PetscInt imark = -1; 1902 if (v) { 1903 *v = v_p = mat->rowvalues; 1904 for (i=0; i<nzB; i++) { 1905 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1906 else break; 1907 } 1908 imark = i; 1909 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1910 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1911 } 1912 if (idx) { 1913 *idx = idx_p = mat->rowindices; 1914 if (imark > -1) { 1915 for (i=0; i<imark; i++) { 1916 idx_p[i] = cmap[cworkB[i]]; 1917 } 1918 } else { 1919 for (i=0; i<nzB; i++) { 1920 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1921 else break; 1922 } 1923 imark = i; 1924 } 1925 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1926 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1927 } 1928 } else { 1929 if (idx) *idx = 0; 1930 if (v) *v = 0; 1931 } 1932 } 1933 *nz = nztot; 1934 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1935 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1940 { 1941 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1942 1943 PetscFunctionBegin; 1944 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1945 aij->getrowactive = PETSC_FALSE; 1946 PetscFunctionReturn(0); 1947 } 1948 1949 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1950 { 1951 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1952 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1953 PetscErrorCode ierr; 1954 PetscInt i,j,cstart = mat->cmap->rstart; 1955 PetscReal sum = 0.0; 1956 MatScalar *v; 1957 1958 PetscFunctionBegin; 1959 if (aij->size == 1) { 1960 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1961 } else { 1962 if (type == NORM_FROBENIUS) { 1963 v = amat->a; 1964 for (i=0; i<amat->nz; i++) { 1965 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1966 } 1967 v = bmat->a; 1968 for (i=0; i<bmat->nz; i++) { 1969 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1970 } 1971 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1972 *norm = PetscSqrtReal(*norm); 1973 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1974 } else if (type == NORM_1) { /* max column norm */ 1975 PetscReal *tmp,*tmp2; 1976 PetscInt *jj,*garray = aij->garray; 1977 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1978 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1979 *norm = 0.0; 1980 v = amat->a; jj = amat->j; 1981 for (j=0; j<amat->nz; j++) { 1982 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1983 } 1984 v = bmat->a; jj = bmat->j; 1985 for (j=0; j<bmat->nz; j++) { 1986 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1987 } 1988 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1989 for (j=0; j<mat->cmap->N; j++) { 1990 if (tmp2[j] > *norm) *norm = tmp2[j]; 1991 } 1992 ierr = PetscFree(tmp);CHKERRQ(ierr); 1993 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1994 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1995 } else if (type == NORM_INFINITY) { /* max row norm */ 1996 PetscReal ntemp = 0.0; 1997 for (j=0; j<aij->A->rmap->n; j++) { 1998 v = amat->a + amat->i[j]; 1999 sum = 0.0; 2000 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2001 sum += PetscAbsScalar(*v); v++; 2002 } 2003 v = bmat->a + bmat->i[j]; 2004 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2005 sum += PetscAbsScalar(*v); v++; 2006 } 2007 if (sum > ntemp) ntemp = sum; 2008 } 2009 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2010 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2011 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2017 { 2018 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2019 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2020 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2021 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2022 PetscErrorCode ierr; 2023 Mat B,A_diag,*B_diag; 2024 const MatScalar *array; 2025 2026 PetscFunctionBegin; 2027 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2028 ai = Aloc->i; aj = Aloc->j; 2029 bi = Bloc->i; bj = Bloc->j; 2030 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2031 PetscInt *d_nnz,*g_nnz,*o_nnz; 2032 PetscSFNode *oloc; 2033 PETSC_UNUSED PetscSF sf; 2034 2035 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2036 /* compute d_nnz for preallocation */ 2037 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2038 for (i=0; i<ai[ma]; i++) { 2039 d_nnz[aj[i]]++; 2040 } 2041 /* compute local off-diagonal contributions */ 2042 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2043 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2044 /* map those to global */ 2045 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2046 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2047 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2048 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2049 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2050 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2051 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2052 2053 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2054 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2055 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2056 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2057 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2058 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2059 } else { 2060 B = *matout; 2061 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2062 } 2063 2064 b = (Mat_MPIAIJ*)B->data; 2065 A_diag = a->A; 2066 B_diag = &b->A; 2067 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2068 A_diag_ncol = A_diag->cmap->N; 2069 B_diag_ilen = sub_B_diag->ilen; 2070 B_diag_i = sub_B_diag->i; 2071 2072 /* Set ilen for diagonal of B */ 2073 for (i=0; i<A_diag_ncol; i++) { 2074 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2075 } 2076 2077 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2078 very quickly (=without using MatSetValues), because all writes are local. */ 2079 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2080 2081 /* copy over the B part */ 2082 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2083 array = Bloc->a; 2084 row = A->rmap->rstart; 2085 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2086 cols_tmp = cols; 2087 for (i=0; i<mb; i++) { 2088 ncol = bi[i+1]-bi[i]; 2089 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2090 row++; 2091 array += ncol; cols_tmp += ncol; 2092 } 2093 ierr = PetscFree(cols);CHKERRQ(ierr); 2094 2095 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2096 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2097 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2098 *matout = B; 2099 } else { 2100 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2101 } 2102 PetscFunctionReturn(0); 2103 } 2104 2105 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2106 { 2107 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2108 Mat a = aij->A,b = aij->B; 2109 PetscErrorCode ierr; 2110 PetscInt s1,s2,s3; 2111 2112 PetscFunctionBegin; 2113 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2114 if (rr) { 2115 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2116 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2117 /* Overlap communication with computation. */ 2118 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2119 } 2120 if (ll) { 2121 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2122 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2123 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2124 } 2125 /* scale the diagonal block */ 2126 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2127 2128 if (rr) { 2129 /* Do a scatter end and then right scale the off-diagonal block */ 2130 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2131 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2132 } 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2137 { 2138 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2139 PetscErrorCode ierr; 2140 2141 PetscFunctionBegin; 2142 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2147 { 2148 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2149 Mat a,b,c,d; 2150 PetscBool flg; 2151 PetscErrorCode ierr; 2152 2153 PetscFunctionBegin; 2154 a = matA->A; b = matA->B; 2155 c = matB->A; d = matB->B; 2156 2157 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2158 if (flg) { 2159 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2160 } 2161 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2162 PetscFunctionReturn(0); 2163 } 2164 2165 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2166 { 2167 PetscErrorCode ierr; 2168 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2169 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2170 2171 PetscFunctionBegin; 2172 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2173 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2174 /* because of the column compression in the off-processor part of the matrix a->B, 2175 the number of columns in a->B and b->B may be different, hence we cannot call 2176 the MatCopy() directly on the two parts. If need be, we can provide a more 2177 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2178 then copying the submatrices */ 2179 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2180 } else { 2181 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2182 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2183 } 2184 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2189 { 2190 PetscErrorCode ierr; 2191 2192 PetscFunctionBegin; 2193 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2194 PetscFunctionReturn(0); 2195 } 2196 2197 /* 2198 Computes the number of nonzeros per row needed for preallocation when X and Y 2199 have different nonzero structure. 2200 */ 2201 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2202 { 2203 PetscInt i,j,k,nzx,nzy; 2204 2205 PetscFunctionBegin; 2206 /* Set the number of nonzeros in the new matrix */ 2207 for (i=0; i<m; i++) { 2208 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2209 nzx = xi[i+1] - xi[i]; 2210 nzy = yi[i+1] - yi[i]; 2211 nnz[i] = 0; 2212 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2213 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2214 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2215 nnz[i]++; 2216 } 2217 for (; k<nzy; k++) nnz[i]++; 2218 } 2219 PetscFunctionReturn(0); 2220 } 2221 2222 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2223 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2224 { 2225 PetscErrorCode ierr; 2226 PetscInt m = Y->rmap->N; 2227 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2228 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2229 2230 PetscFunctionBegin; 2231 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2232 PetscFunctionReturn(0); 2233 } 2234 2235 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2239 PetscBLASInt bnz,one=1; 2240 Mat_SeqAIJ *x,*y; 2241 2242 PetscFunctionBegin; 2243 if (str == SAME_NONZERO_PATTERN) { 2244 PetscScalar alpha = a; 2245 x = (Mat_SeqAIJ*)xx->A->data; 2246 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2247 y = (Mat_SeqAIJ*)yy->A->data; 2248 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2249 x = (Mat_SeqAIJ*)xx->B->data; 2250 y = (Mat_SeqAIJ*)yy->B->data; 2251 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2252 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2253 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2254 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2255 will be updated */ 2256 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2257 if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) { 2258 Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 2259 } 2260 #endif 2261 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2262 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2263 } else { 2264 Mat B; 2265 PetscInt *nnz_d,*nnz_o; 2266 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2267 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2268 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2269 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2270 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2271 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2272 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2273 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2274 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2275 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2276 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2277 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2278 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2279 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2280 } 2281 PetscFunctionReturn(0); 2282 } 2283 2284 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2285 2286 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2287 { 2288 #if defined(PETSC_USE_COMPLEX) 2289 PetscErrorCode ierr; 2290 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2291 2292 PetscFunctionBegin; 2293 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2294 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2295 #else 2296 PetscFunctionBegin; 2297 #endif 2298 PetscFunctionReturn(0); 2299 } 2300 2301 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2302 { 2303 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2304 PetscErrorCode ierr; 2305 2306 PetscFunctionBegin; 2307 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2308 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2309 PetscFunctionReturn(0); 2310 } 2311 2312 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2313 { 2314 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2315 PetscErrorCode ierr; 2316 2317 PetscFunctionBegin; 2318 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2319 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2320 PetscFunctionReturn(0); 2321 } 2322 2323 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2324 { 2325 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2326 PetscErrorCode ierr; 2327 PetscInt i,*idxb = 0; 2328 PetscScalar *va,*vb; 2329 Vec vtmp; 2330 2331 PetscFunctionBegin; 2332 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2333 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2334 if (idx) { 2335 for (i=0; i<A->rmap->n; i++) { 2336 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2337 } 2338 } 2339 2340 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2341 if (idx) { 2342 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2343 } 2344 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2345 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2346 2347 for (i=0; i<A->rmap->n; i++) { 2348 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2349 va[i] = vb[i]; 2350 if (idx) idx[i] = a->garray[idxb[i]]; 2351 } 2352 } 2353 2354 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2355 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2356 ierr = PetscFree(idxb);CHKERRQ(ierr); 2357 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2362 { 2363 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2364 PetscErrorCode ierr; 2365 PetscInt i,*idxb = 0; 2366 PetscScalar *va,*vb; 2367 Vec vtmp; 2368 2369 PetscFunctionBegin; 2370 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2371 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2372 if (idx) { 2373 for (i=0; i<A->cmap->n; i++) { 2374 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2375 } 2376 } 2377 2378 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2379 if (idx) { 2380 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2381 } 2382 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2383 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2384 2385 for (i=0; i<A->rmap->n; i++) { 2386 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2387 va[i] = vb[i]; 2388 if (idx) idx[i] = a->garray[idxb[i]]; 2389 } 2390 } 2391 2392 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2393 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2394 ierr = PetscFree(idxb);CHKERRQ(ierr); 2395 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2396 PetscFunctionReturn(0); 2397 } 2398 2399 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2400 { 2401 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2402 PetscInt n = A->rmap->n; 2403 PetscInt cstart = A->cmap->rstart; 2404 PetscInt *cmap = mat->garray; 2405 PetscInt *diagIdx, *offdiagIdx; 2406 Vec diagV, offdiagV; 2407 PetscScalar *a, *diagA, *offdiagA; 2408 PetscInt r; 2409 PetscErrorCode ierr; 2410 2411 PetscFunctionBegin; 2412 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2413 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2414 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2415 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2416 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2417 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2418 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2419 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2420 for (r = 0; r < n; ++r) { 2421 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2422 a[r] = diagA[r]; 2423 idx[r] = cstart + diagIdx[r]; 2424 } else { 2425 a[r] = offdiagA[r]; 2426 idx[r] = cmap[offdiagIdx[r]]; 2427 } 2428 } 2429 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2430 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2431 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2432 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2433 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2434 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2435 PetscFunctionReturn(0); 2436 } 2437 2438 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2439 { 2440 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2441 PetscInt n = A->rmap->n; 2442 PetscInt cstart = A->cmap->rstart; 2443 PetscInt *cmap = mat->garray; 2444 PetscInt *diagIdx, *offdiagIdx; 2445 Vec diagV, offdiagV; 2446 PetscScalar *a, *diagA, *offdiagA; 2447 PetscInt r; 2448 PetscErrorCode ierr; 2449 2450 PetscFunctionBegin; 2451 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2452 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2453 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2454 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2455 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2456 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2457 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2458 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2459 for (r = 0; r < n; ++r) { 2460 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2461 a[r] = diagA[r]; 2462 idx[r] = cstart + diagIdx[r]; 2463 } else { 2464 a[r] = offdiagA[r]; 2465 idx[r] = cmap[offdiagIdx[r]]; 2466 } 2467 } 2468 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2469 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2470 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2471 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2472 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2473 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2478 { 2479 PetscErrorCode ierr; 2480 Mat *dummy; 2481 2482 PetscFunctionBegin; 2483 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2484 *newmat = *dummy; 2485 ierr = PetscFree(dummy);CHKERRQ(ierr); 2486 PetscFunctionReturn(0); 2487 } 2488 2489 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2490 { 2491 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2492 PetscErrorCode ierr; 2493 2494 PetscFunctionBegin; 2495 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2496 A->factorerrortype = a->A->factorerrortype; 2497 PetscFunctionReturn(0); 2498 } 2499 2500 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2501 { 2502 PetscErrorCode ierr; 2503 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2504 2505 PetscFunctionBegin; 2506 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2507 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2508 if (x->assembled) { 2509 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2510 } else { 2511 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2512 } 2513 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2514 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517 2518 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2519 { 2520 PetscFunctionBegin; 2521 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2522 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2523 PetscFunctionReturn(0); 2524 } 2525 2526 /*@ 2527 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2528 2529 Collective on Mat 2530 2531 Input Parameters: 2532 + A - the matrix 2533 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2534 2535 Level: advanced 2536 2537 @*/ 2538 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2539 { 2540 PetscErrorCode ierr; 2541 2542 PetscFunctionBegin; 2543 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2544 PetscFunctionReturn(0); 2545 } 2546 2547 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2548 { 2549 PetscErrorCode ierr; 2550 PetscBool sc = PETSC_FALSE,flg; 2551 2552 PetscFunctionBegin; 2553 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2554 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2555 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2556 if (flg) { 2557 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2558 } 2559 ierr = PetscOptionsTail();CHKERRQ(ierr); 2560 PetscFunctionReturn(0); 2561 } 2562 2563 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2564 { 2565 PetscErrorCode ierr; 2566 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2567 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2568 2569 PetscFunctionBegin; 2570 if (!Y->preallocated) { 2571 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2572 } else if (!aij->nz) { 2573 PetscInt nonew = aij->nonew; 2574 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2575 aij->nonew = nonew; 2576 } 2577 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2582 { 2583 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2584 PetscErrorCode ierr; 2585 2586 PetscFunctionBegin; 2587 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2588 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2589 if (d) { 2590 PetscInt rstart; 2591 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2592 *d += rstart; 2593 2594 } 2595 PetscFunctionReturn(0); 2596 } 2597 2598 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2599 { 2600 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2601 PetscErrorCode ierr; 2602 2603 PetscFunctionBegin; 2604 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2605 PetscFunctionReturn(0); 2606 } 2607 2608 /* -------------------------------------------------------------------*/ 2609 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2610 MatGetRow_MPIAIJ, 2611 MatRestoreRow_MPIAIJ, 2612 MatMult_MPIAIJ, 2613 /* 4*/ MatMultAdd_MPIAIJ, 2614 MatMultTranspose_MPIAIJ, 2615 MatMultTransposeAdd_MPIAIJ, 2616 0, 2617 0, 2618 0, 2619 /*10*/ 0, 2620 0, 2621 0, 2622 MatSOR_MPIAIJ, 2623 MatTranspose_MPIAIJ, 2624 /*15*/ MatGetInfo_MPIAIJ, 2625 MatEqual_MPIAIJ, 2626 MatGetDiagonal_MPIAIJ, 2627 MatDiagonalScale_MPIAIJ, 2628 MatNorm_MPIAIJ, 2629 /*20*/ MatAssemblyBegin_MPIAIJ, 2630 MatAssemblyEnd_MPIAIJ, 2631 MatSetOption_MPIAIJ, 2632 MatZeroEntries_MPIAIJ, 2633 /*24*/ MatZeroRows_MPIAIJ, 2634 0, 2635 0, 2636 0, 2637 0, 2638 /*29*/ MatSetUp_MPIAIJ, 2639 0, 2640 0, 2641 MatGetDiagonalBlock_MPIAIJ, 2642 0, 2643 /*34*/ MatDuplicate_MPIAIJ, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /*39*/ MatAXPY_MPIAIJ, 2649 MatCreateSubMatrices_MPIAIJ, 2650 MatIncreaseOverlap_MPIAIJ, 2651 MatGetValues_MPIAIJ, 2652 MatCopy_MPIAIJ, 2653 /*44*/ MatGetRowMax_MPIAIJ, 2654 MatScale_MPIAIJ, 2655 MatShift_MPIAIJ, 2656 MatDiagonalSet_MPIAIJ, 2657 MatZeroRowsColumns_MPIAIJ, 2658 /*49*/ MatSetRandom_MPIAIJ, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*54*/ MatFDColoringCreate_MPIXAIJ, 2664 0, 2665 MatSetUnfactored_MPIAIJ, 2666 MatPermute_MPIAIJ, 2667 0, 2668 /*59*/ MatCreateSubMatrix_MPIAIJ, 2669 MatDestroy_MPIAIJ, 2670 MatView_MPIAIJ, 2671 0, 2672 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2673 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2674 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2675 0, 2676 0, 2677 0, 2678 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2679 MatGetRowMinAbs_MPIAIJ, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*75*/ MatFDColoringApply_AIJ, 2685 MatSetFromOptions_MPIAIJ, 2686 0, 2687 0, 2688 MatFindZeroDiagonals_MPIAIJ, 2689 /*80*/ 0, 2690 0, 2691 0, 2692 /*83*/ MatLoad_MPIAIJ, 2693 MatIsSymmetric_MPIAIJ, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2699 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2700 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2701 MatPtAP_MPIAIJ_MPIAIJ, 2702 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2703 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2704 0, 2705 0, 2706 0, 2707 0, 2708 /*99*/ 0, 2709 0, 2710 0, 2711 MatConjugate_MPIAIJ, 2712 0, 2713 /*104*/MatSetValuesRow_MPIAIJ, 2714 MatRealPart_MPIAIJ, 2715 MatImaginaryPart_MPIAIJ, 2716 0, 2717 0, 2718 /*109*/0, 2719 0, 2720 MatGetRowMin_MPIAIJ, 2721 0, 2722 MatMissingDiagonal_MPIAIJ, 2723 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2724 0, 2725 MatGetGhosts_MPIAIJ, 2726 0, 2727 0, 2728 /*119*/0, 2729 0, 2730 0, 2731 0, 2732 MatGetMultiProcBlock_MPIAIJ, 2733 /*124*/MatFindNonzeroRows_MPIAIJ, 2734 MatGetColumnNorms_MPIAIJ, 2735 MatInvertBlockDiagonal_MPIAIJ, 2736 MatInvertVariableBlockDiagonal_MPIAIJ, 2737 MatCreateSubMatricesMPI_MPIAIJ, 2738 /*129*/0, 2739 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2740 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2741 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2742 0, 2743 /*134*/0, 2744 0, 2745 MatRARt_MPIAIJ_MPIAIJ, 2746 0, 2747 0, 2748 /*139*/MatSetBlockSizes_MPIAIJ, 2749 0, 2750 0, 2751 MatFDColoringSetUp_MPIXAIJ, 2752 MatFindOffBlockDiagonalEntries_MPIAIJ, 2753 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2754 }; 2755 2756 /* ----------------------------------------------------------------------------------------*/ 2757 2758 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2759 { 2760 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2761 PetscErrorCode ierr; 2762 2763 PetscFunctionBegin; 2764 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2765 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2766 PetscFunctionReturn(0); 2767 } 2768 2769 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2770 { 2771 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2772 PetscErrorCode ierr; 2773 2774 PetscFunctionBegin; 2775 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2776 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2777 PetscFunctionReturn(0); 2778 } 2779 2780 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2781 { 2782 Mat_MPIAIJ *b; 2783 PetscErrorCode ierr; 2784 PetscMPIInt size; 2785 2786 PetscFunctionBegin; 2787 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2788 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2789 b = (Mat_MPIAIJ*)B->data; 2790 2791 #if defined(PETSC_USE_CTABLE) 2792 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2793 #else 2794 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2795 #endif 2796 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2797 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2798 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2799 2800 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2801 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2802 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2803 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2804 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2805 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2806 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2807 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2808 2809 if (!B->preallocated) { 2810 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2811 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2812 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2813 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2814 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2815 } 2816 2817 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2818 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2819 B->preallocated = PETSC_TRUE; 2820 B->was_assembled = PETSC_FALSE; 2821 B->assembled = PETSC_FALSE; 2822 PetscFunctionReturn(0); 2823 } 2824 2825 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2826 { 2827 Mat_MPIAIJ *b; 2828 PetscErrorCode ierr; 2829 2830 PetscFunctionBegin; 2831 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2832 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2833 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2834 b = (Mat_MPIAIJ*)B->data; 2835 2836 #if defined(PETSC_USE_CTABLE) 2837 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2838 #else 2839 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2840 #endif 2841 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2842 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2843 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2844 2845 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2846 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2847 B->preallocated = PETSC_TRUE; 2848 B->was_assembled = PETSC_FALSE; 2849 B->assembled = PETSC_FALSE; 2850 PetscFunctionReturn(0); 2851 } 2852 2853 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2854 { 2855 Mat mat; 2856 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2857 PetscErrorCode ierr; 2858 2859 PetscFunctionBegin; 2860 *newmat = 0; 2861 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2862 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2863 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2864 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2865 a = (Mat_MPIAIJ*)mat->data; 2866 2867 mat->factortype = matin->factortype; 2868 mat->assembled = PETSC_TRUE; 2869 mat->insertmode = NOT_SET_VALUES; 2870 mat->preallocated = PETSC_TRUE; 2871 2872 a->size = oldmat->size; 2873 a->rank = oldmat->rank; 2874 a->donotstash = oldmat->donotstash; 2875 a->roworiented = oldmat->roworiented; 2876 a->rowindices = 0; 2877 a->rowvalues = 0; 2878 a->getrowactive = PETSC_FALSE; 2879 2880 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2881 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2882 2883 if (oldmat->colmap) { 2884 #if defined(PETSC_USE_CTABLE) 2885 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2886 #else 2887 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2888 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2889 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2890 #endif 2891 } else a->colmap = 0; 2892 if (oldmat->garray) { 2893 PetscInt len; 2894 len = oldmat->B->cmap->n; 2895 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2896 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2897 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2898 } else a->garray = 0; 2899 2900 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2901 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2902 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2903 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2904 2905 if (oldmat->Mvctx_mpi1) { 2906 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2907 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2908 } 2909 2910 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2911 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2912 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2913 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2914 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2915 *newmat = mat; 2916 PetscFunctionReturn(0); 2917 } 2918 2919 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2920 { 2921 PetscBool isbinary, ishdf5; 2922 PetscErrorCode ierr; 2923 2924 PetscFunctionBegin; 2925 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2926 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2927 /* force binary viewer to load .info file if it has not yet done so */ 2928 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2929 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2930 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2931 if (isbinary) { 2932 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2933 } else if (ishdf5) { 2934 #if defined(PETSC_HAVE_HDF5) 2935 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2936 #else 2937 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2938 #endif 2939 } else { 2940 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2941 } 2942 PetscFunctionReturn(0); 2943 } 2944 2945 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2946 { 2947 PetscScalar *vals,*svals; 2948 MPI_Comm comm; 2949 PetscErrorCode ierr; 2950 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2951 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2952 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2953 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2954 PetscInt cend,cstart,n,*rowners; 2955 int fd; 2956 PetscInt bs = newMat->rmap->bs; 2957 2958 PetscFunctionBegin; 2959 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2960 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2961 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2962 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2963 if (!rank) { 2964 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2965 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2966 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2967 } 2968 2969 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2970 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2971 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2972 if (bs < 0) bs = 1; 2973 2974 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2975 M = header[1]; N = header[2]; 2976 2977 /* If global sizes are set, check if they are consistent with that given in the file */ 2978 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2979 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2980 2981 /* determine ownership of all (block) rows */ 2982 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2983 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2984 else m = newMat->rmap->n; /* Set by user */ 2985 2986 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2987 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2988 2989 /* First process needs enough room for process with most rows */ 2990 if (!rank) { 2991 mmax = rowners[1]; 2992 for (i=2; i<=size; i++) { 2993 mmax = PetscMax(mmax, rowners[i]); 2994 } 2995 } else mmax = -1; /* unused, but compilers complain */ 2996 2997 rowners[0] = 0; 2998 for (i=2; i<=size; i++) { 2999 rowners[i] += rowners[i-1]; 3000 } 3001 rstart = rowners[rank]; 3002 rend = rowners[rank+1]; 3003 3004 /* distribute row lengths to all processors */ 3005 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3006 if (!rank) { 3007 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 3008 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3009 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3010 for (j=0; j<m; j++) { 3011 procsnz[0] += ourlens[j]; 3012 } 3013 for (i=1; i<size; i++) { 3014 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 3015 /* calculate the number of nonzeros on each processor */ 3016 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3017 procsnz[i] += rowlengths[j]; 3018 } 3019 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3020 } 3021 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3022 } else { 3023 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3024 } 3025 3026 if (!rank) { 3027 /* determine max buffer needed and allocate it */ 3028 maxnz = 0; 3029 for (i=0; i<size; i++) { 3030 maxnz = PetscMax(maxnz,procsnz[i]); 3031 } 3032 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3033 3034 /* read in my part of the matrix column indices */ 3035 nz = procsnz[0]; 3036 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3037 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3038 3039 /* read in every one elses and ship off */ 3040 for (i=1; i<size; i++) { 3041 nz = procsnz[i]; 3042 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3043 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3044 } 3045 ierr = PetscFree(cols);CHKERRQ(ierr); 3046 } else { 3047 /* determine buffer space needed for message */ 3048 nz = 0; 3049 for (i=0; i<m; i++) { 3050 nz += ourlens[i]; 3051 } 3052 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3053 3054 /* receive message of column indices*/ 3055 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3056 } 3057 3058 /* determine column ownership if matrix is not square */ 3059 if (N != M) { 3060 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3061 else n = newMat->cmap->n; 3062 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3063 cstart = cend - n; 3064 } else { 3065 cstart = rstart; 3066 cend = rend; 3067 n = cend - cstart; 3068 } 3069 3070 /* loop over local rows, determining number of off diagonal entries */ 3071 ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr); 3072 jj = 0; 3073 for (i=0; i<m; i++) { 3074 for (j=0; j<ourlens[i]; j++) { 3075 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3076 jj++; 3077 } 3078 } 3079 3080 for (i=0; i<m; i++) { 3081 ourlens[i] -= offlens[i]; 3082 } 3083 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3084 3085 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3086 3087 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3088 3089 for (i=0; i<m; i++) { 3090 ourlens[i] += offlens[i]; 3091 } 3092 3093 if (!rank) { 3094 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3095 3096 /* read in my part of the matrix numerical values */ 3097 nz = procsnz[0]; 3098 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3099 3100 /* insert into matrix */ 3101 jj = rstart; 3102 smycols = mycols; 3103 svals = vals; 3104 for (i=0; i<m; i++) { 3105 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3106 smycols += ourlens[i]; 3107 svals += ourlens[i]; 3108 jj++; 3109 } 3110 3111 /* read in other processors and ship out */ 3112 for (i=1; i<size; i++) { 3113 nz = procsnz[i]; 3114 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3115 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3116 } 3117 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3118 } else { 3119 /* receive numeric values */ 3120 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3121 3122 /* receive message of values*/ 3123 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3124 3125 /* insert into matrix */ 3126 jj = rstart; 3127 smycols = mycols; 3128 svals = vals; 3129 for (i=0; i<m; i++) { 3130 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3131 smycols += ourlens[i]; 3132 svals += ourlens[i]; 3133 jj++; 3134 } 3135 } 3136 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3137 ierr = PetscFree(vals);CHKERRQ(ierr); 3138 ierr = PetscFree(mycols);CHKERRQ(ierr); 3139 ierr = PetscFree(rowners);CHKERRQ(ierr); 3140 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3141 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3142 PetscFunctionReturn(0); 3143 } 3144 3145 /* Not scalable because of ISAllGather() unless getting all columns. */ 3146 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3147 { 3148 PetscErrorCode ierr; 3149 IS iscol_local; 3150 PetscBool isstride; 3151 PetscMPIInt lisstride=0,gisstride; 3152 3153 PetscFunctionBegin; 3154 /* check if we are grabbing all columns*/ 3155 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3156 3157 if (isstride) { 3158 PetscInt start,len,mstart,mlen; 3159 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3160 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3161 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3162 if (mstart == start && mlen-mstart == len) lisstride = 1; 3163 } 3164 3165 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3166 if (gisstride) { 3167 PetscInt N; 3168 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3169 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3170 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3171 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3172 } else { 3173 PetscInt cbs; 3174 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3175 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3176 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3177 } 3178 3179 *isseq = iscol_local; 3180 PetscFunctionReturn(0); 3181 } 3182 3183 /* 3184 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3185 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3186 3187 Input Parameters: 3188 mat - matrix 3189 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3190 i.e., mat->rstart <= isrow[i] < mat->rend 3191 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3192 i.e., mat->cstart <= iscol[i] < mat->cend 3193 Output Parameter: 3194 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3195 iscol_o - sequential column index set for retrieving mat->B 3196 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3197 */ 3198 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3199 { 3200 PetscErrorCode ierr; 3201 Vec x,cmap; 3202 const PetscInt *is_idx; 3203 PetscScalar *xarray,*cmaparray; 3204 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3205 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3206 Mat B=a->B; 3207 Vec lvec=a->lvec,lcmap; 3208 PetscInt i,cstart,cend,Bn=B->cmap->N; 3209 MPI_Comm comm; 3210 VecScatter Mvctx=a->Mvctx; 3211 3212 PetscFunctionBegin; 3213 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3214 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3215 3216 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3217 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3218 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3219 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3220 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3221 3222 /* Get start indices */ 3223 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3224 isstart -= ncols; 3225 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3226 3227 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3228 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3229 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3230 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3231 for (i=0; i<ncols; i++) { 3232 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3233 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3234 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3235 } 3236 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3237 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3238 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3239 3240 /* Get iscol_d */ 3241 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3242 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3243 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3244 3245 /* Get isrow_d */ 3246 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3247 rstart = mat->rmap->rstart; 3248 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3249 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3250 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3251 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3252 3253 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3254 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3255 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3256 3257 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3258 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3259 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3260 3261 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3262 3263 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3264 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3265 3266 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3267 /* off-process column indices */ 3268 count = 0; 3269 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3270 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3271 3272 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3273 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3274 for (i=0; i<Bn; i++) { 3275 if (PetscRealPart(xarray[i]) > -1.0) { 3276 idx[count] = i; /* local column index in off-diagonal part B */ 3277 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3278 count++; 3279 } 3280 } 3281 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3282 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3283 3284 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3285 /* cannot ensure iscol_o has same blocksize as iscol! */ 3286 3287 ierr = PetscFree(idx);CHKERRQ(ierr); 3288 *garray = cmap1; 3289 3290 ierr = VecDestroy(&x);CHKERRQ(ierr); 3291 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3292 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3293 PetscFunctionReturn(0); 3294 } 3295 3296 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3297 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3298 { 3299 PetscErrorCode ierr; 3300 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3301 Mat M = NULL; 3302 MPI_Comm comm; 3303 IS iscol_d,isrow_d,iscol_o; 3304 Mat Asub = NULL,Bsub = NULL; 3305 PetscInt n; 3306 3307 PetscFunctionBegin; 3308 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3309 3310 if (call == MAT_REUSE_MATRIX) { 3311 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3312 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3313 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3314 3315 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3316 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3317 3318 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3319 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3320 3321 /* Update diagonal and off-diagonal portions of submat */ 3322 asub = (Mat_MPIAIJ*)(*submat)->data; 3323 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3324 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3325 if (n) { 3326 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3327 } 3328 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3329 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3330 3331 } else { /* call == MAT_INITIAL_MATRIX) */ 3332 const PetscInt *garray; 3333 PetscInt BsubN; 3334 3335 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3336 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3337 3338 /* Create local submatrices Asub and Bsub */ 3339 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3340 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3341 3342 /* Create submatrix M */ 3343 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3344 3345 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3346 asub = (Mat_MPIAIJ*)M->data; 3347 3348 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3349 n = asub->B->cmap->N; 3350 if (BsubN > n) { 3351 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3352 const PetscInt *idx; 3353 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3354 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3355 3356 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3357 j = 0; 3358 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3359 for (i=0; i<n; i++) { 3360 if (j >= BsubN) break; 3361 while (subgarray[i] > garray[j]) j++; 3362 3363 if (subgarray[i] == garray[j]) { 3364 idx_new[i] = idx[j++]; 3365 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3366 } 3367 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3368 3369 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3370 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3371 3372 } else if (BsubN < n) { 3373 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3374 } 3375 3376 ierr = PetscFree(garray);CHKERRQ(ierr); 3377 *submat = M; 3378 3379 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3380 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3381 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3382 3383 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3384 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3385 3386 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3387 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3388 } 3389 PetscFunctionReturn(0); 3390 } 3391 3392 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3393 { 3394 PetscErrorCode ierr; 3395 IS iscol_local=NULL,isrow_d; 3396 PetscInt csize; 3397 PetscInt n,i,j,start,end; 3398 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3399 MPI_Comm comm; 3400 3401 PetscFunctionBegin; 3402 /* If isrow has same processor distribution as mat, 3403 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3404 if (call == MAT_REUSE_MATRIX) { 3405 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3406 if (isrow_d) { 3407 sameRowDist = PETSC_TRUE; 3408 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3409 } else { 3410 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3411 if (iscol_local) { 3412 sameRowDist = PETSC_TRUE; 3413 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3414 } 3415 } 3416 } else { 3417 /* Check if isrow has same processor distribution as mat */ 3418 sameDist[0] = PETSC_FALSE; 3419 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3420 if (!n) { 3421 sameDist[0] = PETSC_TRUE; 3422 } else { 3423 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3424 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3425 if (i >= start && j < end) { 3426 sameDist[0] = PETSC_TRUE; 3427 } 3428 } 3429 3430 /* Check if iscol has same processor distribution as mat */ 3431 sameDist[1] = PETSC_FALSE; 3432 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3433 if (!n) { 3434 sameDist[1] = PETSC_TRUE; 3435 } else { 3436 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3437 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3438 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3439 } 3440 3441 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3442 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3443 sameRowDist = tsameDist[0]; 3444 } 3445 3446 if (sameRowDist) { 3447 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3448 /* isrow and iscol have same processor distribution as mat */ 3449 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3450 PetscFunctionReturn(0); 3451 } else { /* sameRowDist */ 3452 /* isrow has same processor distribution as mat */ 3453 if (call == MAT_INITIAL_MATRIX) { 3454 PetscBool sorted; 3455 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3456 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3457 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3458 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3459 3460 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3461 if (sorted) { 3462 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3463 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3464 PetscFunctionReturn(0); 3465 } 3466 } else { /* call == MAT_REUSE_MATRIX */ 3467 IS iscol_sub; 3468 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3469 if (iscol_sub) { 3470 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3471 PetscFunctionReturn(0); 3472 } 3473 } 3474 } 3475 } 3476 3477 /* General case: iscol -> iscol_local which has global size of iscol */ 3478 if (call == MAT_REUSE_MATRIX) { 3479 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3480 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3481 } else { 3482 if (!iscol_local) { 3483 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3484 } 3485 } 3486 3487 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3488 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3489 3490 if (call == MAT_INITIAL_MATRIX) { 3491 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3492 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3493 } 3494 PetscFunctionReturn(0); 3495 } 3496 3497 /*@C 3498 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3499 and "off-diagonal" part of the matrix in CSR format. 3500 3501 Collective 3502 3503 Input Parameters: 3504 + comm - MPI communicator 3505 . A - "diagonal" portion of matrix 3506 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3507 - garray - global index of B columns 3508 3509 Output Parameter: 3510 . mat - the matrix, with input A as its local diagonal matrix 3511 Level: advanced 3512 3513 Notes: 3514 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3515 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3516 3517 .seealso: MatCreateMPIAIJWithSplitArrays() 3518 @*/ 3519 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3520 { 3521 PetscErrorCode ierr; 3522 Mat_MPIAIJ *maij; 3523 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3524 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3525 PetscScalar *oa=b->a; 3526 Mat Bnew; 3527 PetscInt m,n,N; 3528 3529 PetscFunctionBegin; 3530 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3531 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3532 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3533 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3534 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3535 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3536 3537 /* Get global columns of mat */ 3538 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3539 3540 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3541 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3542 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3543 maij = (Mat_MPIAIJ*)(*mat)->data; 3544 3545 (*mat)->preallocated = PETSC_TRUE; 3546 3547 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3548 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3549 3550 /* Set A as diagonal portion of *mat */ 3551 maij->A = A; 3552 3553 nz = oi[m]; 3554 for (i=0; i<nz; i++) { 3555 col = oj[i]; 3556 oj[i] = garray[col]; 3557 } 3558 3559 /* Set Bnew as off-diagonal portion of *mat */ 3560 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3561 bnew = (Mat_SeqAIJ*)Bnew->data; 3562 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3563 maij->B = Bnew; 3564 3565 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3566 3567 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3568 b->free_a = PETSC_FALSE; 3569 b->free_ij = PETSC_FALSE; 3570 ierr = MatDestroy(&B);CHKERRQ(ierr); 3571 3572 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3573 bnew->free_a = PETSC_TRUE; 3574 bnew->free_ij = PETSC_TRUE; 3575 3576 /* condense columns of maij->B */ 3577 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3578 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3579 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3580 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3581 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3582 PetscFunctionReturn(0); 3583 } 3584 3585 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3586 3587 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3588 { 3589 PetscErrorCode ierr; 3590 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3591 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3592 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3593 Mat M,Msub,B=a->B; 3594 MatScalar *aa; 3595 Mat_SeqAIJ *aij; 3596 PetscInt *garray = a->garray,*colsub,Ncols; 3597 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3598 IS iscol_sub,iscmap; 3599 const PetscInt *is_idx,*cmap; 3600 PetscBool allcolumns=PETSC_FALSE; 3601 MPI_Comm comm; 3602 3603 PetscFunctionBegin; 3604 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3605 3606 if (call == MAT_REUSE_MATRIX) { 3607 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3608 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3609 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3610 3611 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3612 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3613 3614 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3615 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3616 3617 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3618 3619 } else { /* call == MAT_INITIAL_MATRIX) */ 3620 PetscBool flg; 3621 3622 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3623 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3624 3625 /* (1) iscol -> nonscalable iscol_local */ 3626 /* Check for special case: each processor gets entire matrix columns */ 3627 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3628 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3629 if (allcolumns) { 3630 iscol_sub = iscol_local; 3631 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3632 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3633 3634 } else { 3635 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3636 PetscInt *idx,*cmap1,k; 3637 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3638 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3639 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3640 count = 0; 3641 k = 0; 3642 for (i=0; i<Ncols; i++) { 3643 j = is_idx[i]; 3644 if (j >= cstart && j < cend) { 3645 /* diagonal part of mat */ 3646 idx[count] = j; 3647 cmap1[count++] = i; /* column index in submat */ 3648 } else if (Bn) { 3649 /* off-diagonal part of mat */ 3650 if (j == garray[k]) { 3651 idx[count] = j; 3652 cmap1[count++] = i; /* column index in submat */ 3653 } else if (j > garray[k]) { 3654 while (j > garray[k] && k < Bn-1) k++; 3655 if (j == garray[k]) { 3656 idx[count] = j; 3657 cmap1[count++] = i; /* column index in submat */ 3658 } 3659 } 3660 } 3661 } 3662 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3663 3664 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3665 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3666 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3667 3668 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3669 } 3670 3671 /* (3) Create sequential Msub */ 3672 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3673 } 3674 3675 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3676 aij = (Mat_SeqAIJ*)(Msub)->data; 3677 ii = aij->i; 3678 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3679 3680 /* 3681 m - number of local rows 3682 Ncols - number of columns (same on all processors) 3683 rstart - first row in new global matrix generated 3684 */ 3685 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3686 3687 if (call == MAT_INITIAL_MATRIX) { 3688 /* (4) Create parallel newmat */ 3689 PetscMPIInt rank,size; 3690 PetscInt csize; 3691 3692 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3693 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3694 3695 /* 3696 Determine the number of non-zeros in the diagonal and off-diagonal 3697 portions of the matrix in order to do correct preallocation 3698 */ 3699 3700 /* first get start and end of "diagonal" columns */ 3701 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3702 if (csize == PETSC_DECIDE) { 3703 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3704 if (mglobal == Ncols) { /* square matrix */ 3705 nlocal = m; 3706 } else { 3707 nlocal = Ncols/size + ((Ncols % size) > rank); 3708 } 3709 } else { 3710 nlocal = csize; 3711 } 3712 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3713 rstart = rend - nlocal; 3714 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3715 3716 /* next, compute all the lengths */ 3717 jj = aij->j; 3718 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3719 olens = dlens + m; 3720 for (i=0; i<m; i++) { 3721 jend = ii[i+1] - ii[i]; 3722 olen = 0; 3723 dlen = 0; 3724 for (j=0; j<jend; j++) { 3725 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3726 else dlen++; 3727 jj++; 3728 } 3729 olens[i] = olen; 3730 dlens[i] = dlen; 3731 } 3732 3733 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3734 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3735 3736 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3737 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3738 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3739 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3740 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3741 ierr = PetscFree(dlens);CHKERRQ(ierr); 3742 3743 } else { /* call == MAT_REUSE_MATRIX */ 3744 M = *newmat; 3745 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3746 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3747 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3748 /* 3749 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3750 rather than the slower MatSetValues(). 3751 */ 3752 M->was_assembled = PETSC_TRUE; 3753 M->assembled = PETSC_FALSE; 3754 } 3755 3756 /* (5) Set values of Msub to *newmat */ 3757 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3758 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3759 3760 jj = aij->j; 3761 aa = aij->a; 3762 for (i=0; i<m; i++) { 3763 row = rstart + i; 3764 nz = ii[i+1] - ii[i]; 3765 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3766 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3767 jj += nz; aa += nz; 3768 } 3769 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3770 3771 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3772 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3773 3774 ierr = PetscFree(colsub);CHKERRQ(ierr); 3775 3776 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3777 if (call == MAT_INITIAL_MATRIX) { 3778 *newmat = M; 3779 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3780 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3781 3782 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3783 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3784 3785 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3786 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3787 3788 if (iscol_local) { 3789 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3790 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3791 } 3792 } 3793 PetscFunctionReturn(0); 3794 } 3795 3796 /* 3797 Not great since it makes two copies of the submatrix, first an SeqAIJ 3798 in local and then by concatenating the local matrices the end result. 3799 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3800 3801 Note: This requires a sequential iscol with all indices. 3802 */ 3803 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3804 { 3805 PetscErrorCode ierr; 3806 PetscMPIInt rank,size; 3807 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3808 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3809 Mat M,Mreuse; 3810 MatScalar *aa,*vwork; 3811 MPI_Comm comm; 3812 Mat_SeqAIJ *aij; 3813 PetscBool colflag,allcolumns=PETSC_FALSE; 3814 3815 PetscFunctionBegin; 3816 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3817 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3818 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3819 3820 /* Check for special case: each processor gets entire matrix columns */ 3821 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3822 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3823 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3824 3825 if (call == MAT_REUSE_MATRIX) { 3826 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3827 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3828 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3829 } else { 3830 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3831 } 3832 3833 /* 3834 m - number of local rows 3835 n - number of columns (same on all processors) 3836 rstart - first row in new global matrix generated 3837 */ 3838 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3839 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3840 if (call == MAT_INITIAL_MATRIX) { 3841 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3842 ii = aij->i; 3843 jj = aij->j; 3844 3845 /* 3846 Determine the number of non-zeros in the diagonal and off-diagonal 3847 portions of the matrix in order to do correct preallocation 3848 */ 3849 3850 /* first get start and end of "diagonal" columns */ 3851 if (csize == PETSC_DECIDE) { 3852 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3853 if (mglobal == n) { /* square matrix */ 3854 nlocal = m; 3855 } else { 3856 nlocal = n/size + ((n % size) > rank); 3857 } 3858 } else { 3859 nlocal = csize; 3860 } 3861 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3862 rstart = rend - nlocal; 3863 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3864 3865 /* next, compute all the lengths */ 3866 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3867 olens = dlens + m; 3868 for (i=0; i<m; i++) { 3869 jend = ii[i+1] - ii[i]; 3870 olen = 0; 3871 dlen = 0; 3872 for (j=0; j<jend; j++) { 3873 if (*jj < rstart || *jj >= rend) olen++; 3874 else dlen++; 3875 jj++; 3876 } 3877 olens[i] = olen; 3878 dlens[i] = dlen; 3879 } 3880 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3881 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3882 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3883 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3884 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3885 ierr = PetscFree(dlens);CHKERRQ(ierr); 3886 } else { 3887 PetscInt ml,nl; 3888 3889 M = *newmat; 3890 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3891 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3892 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3893 /* 3894 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3895 rather than the slower MatSetValues(). 3896 */ 3897 M->was_assembled = PETSC_TRUE; 3898 M->assembled = PETSC_FALSE; 3899 } 3900 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3901 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3902 ii = aij->i; 3903 jj = aij->j; 3904 aa = aij->a; 3905 for (i=0; i<m; i++) { 3906 row = rstart + i; 3907 nz = ii[i+1] - ii[i]; 3908 cwork = jj; jj += nz; 3909 vwork = aa; aa += nz; 3910 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3911 } 3912 3913 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3914 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3915 *newmat = M; 3916 3917 /* save submatrix used in processor for next request */ 3918 if (call == MAT_INITIAL_MATRIX) { 3919 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3920 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3921 } 3922 PetscFunctionReturn(0); 3923 } 3924 3925 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3926 { 3927 PetscInt m,cstart, cend,j,nnz,i,d; 3928 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3929 const PetscInt *JJ; 3930 PetscErrorCode ierr; 3931 PetscBool nooffprocentries; 3932 3933 PetscFunctionBegin; 3934 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3935 3936 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3937 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3938 m = B->rmap->n; 3939 cstart = B->cmap->rstart; 3940 cend = B->cmap->rend; 3941 rstart = B->rmap->rstart; 3942 3943 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3944 3945 #if defined(PETSC_USE_DEBUG) 3946 for (i=0; i<m; i++) { 3947 nnz = Ii[i+1]- Ii[i]; 3948 JJ = J + Ii[i]; 3949 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3950 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3951 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3952 } 3953 #endif 3954 3955 for (i=0; i<m; i++) { 3956 nnz = Ii[i+1]- Ii[i]; 3957 JJ = J + Ii[i]; 3958 nnz_max = PetscMax(nnz_max,nnz); 3959 d = 0; 3960 for (j=0; j<nnz; j++) { 3961 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3962 } 3963 d_nnz[i] = d; 3964 o_nnz[i] = nnz - d; 3965 } 3966 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3967 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3968 3969 for (i=0; i<m; i++) { 3970 ii = i + rstart; 3971 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3972 } 3973 nooffprocentries = B->nooffprocentries; 3974 B->nooffprocentries = PETSC_TRUE; 3975 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3976 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3977 B->nooffprocentries = nooffprocentries; 3978 3979 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3980 PetscFunctionReturn(0); 3981 } 3982 3983 /*@ 3984 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3985 (the default parallel PETSc format). 3986 3987 Collective 3988 3989 Input Parameters: 3990 + B - the matrix 3991 . i - the indices into j for the start of each local row (starts with zero) 3992 . j - the column indices for each local row (starts with zero) 3993 - v - optional values in the matrix 3994 3995 Level: developer 3996 3997 Notes: 3998 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3999 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4000 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4001 4002 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4003 4004 The format which is used for the sparse matrix input, is equivalent to a 4005 row-major ordering.. i.e for the following matrix, the input data expected is 4006 as shown 4007 4008 $ 1 0 0 4009 $ 2 0 3 P0 4010 $ ------- 4011 $ 4 5 6 P1 4012 $ 4013 $ Process0 [P0]: rows_owned=[0,1] 4014 $ i = {0,1,3} [size = nrow+1 = 2+1] 4015 $ j = {0,0,2} [size = 3] 4016 $ v = {1,2,3} [size = 3] 4017 $ 4018 $ Process1 [P1]: rows_owned=[2] 4019 $ i = {0,3} [size = nrow+1 = 1+1] 4020 $ j = {0,1,2} [size = 3] 4021 $ v = {4,5,6} [size = 3] 4022 4023 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4024 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4025 @*/ 4026 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4027 { 4028 PetscErrorCode ierr; 4029 4030 PetscFunctionBegin; 4031 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4032 PetscFunctionReturn(0); 4033 } 4034 4035 /*@C 4036 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4037 (the default parallel PETSc format). For good matrix assembly performance 4038 the user should preallocate the matrix storage by setting the parameters 4039 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4040 performance can be increased by more than a factor of 50. 4041 4042 Collective 4043 4044 Input Parameters: 4045 + B - the matrix 4046 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4047 (same value is used for all local rows) 4048 . d_nnz - array containing the number of nonzeros in the various rows of the 4049 DIAGONAL portion of the local submatrix (possibly different for each row) 4050 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4051 The size of this array is equal to the number of local rows, i.e 'm'. 4052 For matrices that will be factored, you must leave room for (and set) 4053 the diagonal entry even if it is zero. 4054 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4055 submatrix (same value is used for all local rows). 4056 - o_nnz - array containing the number of nonzeros in the various rows of the 4057 OFF-DIAGONAL portion of the local submatrix (possibly different for 4058 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4059 structure. The size of this array is equal to the number 4060 of local rows, i.e 'm'. 4061 4062 If the *_nnz parameter is given then the *_nz parameter is ignored 4063 4064 The AIJ format (also called the Yale sparse matrix format or 4065 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4066 storage. The stored row and column indices begin with zero. 4067 See Users-Manual: ch_mat for details. 4068 4069 The parallel matrix is partitioned such that the first m0 rows belong to 4070 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4071 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4072 4073 The DIAGONAL portion of the local submatrix of a processor can be defined 4074 as the submatrix which is obtained by extraction the part corresponding to 4075 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4076 first row that belongs to the processor, r2 is the last row belonging to 4077 the this processor, and c1-c2 is range of indices of the local part of a 4078 vector suitable for applying the matrix to. This is an mxn matrix. In the 4079 common case of a square matrix, the row and column ranges are the same and 4080 the DIAGONAL part is also square. The remaining portion of the local 4081 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4082 4083 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4084 4085 You can call MatGetInfo() to get information on how effective the preallocation was; 4086 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4087 You can also run with the option -info and look for messages with the string 4088 malloc in them to see if additional memory allocation was needed. 4089 4090 Example usage: 4091 4092 Consider the following 8x8 matrix with 34 non-zero values, that is 4093 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4094 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4095 as follows: 4096 4097 .vb 4098 1 2 0 | 0 3 0 | 0 4 4099 Proc0 0 5 6 | 7 0 0 | 8 0 4100 9 0 10 | 11 0 0 | 12 0 4101 ------------------------------------- 4102 13 0 14 | 15 16 17 | 0 0 4103 Proc1 0 18 0 | 19 20 21 | 0 0 4104 0 0 0 | 22 23 0 | 24 0 4105 ------------------------------------- 4106 Proc2 25 26 27 | 0 0 28 | 29 0 4107 30 0 0 | 31 32 33 | 0 34 4108 .ve 4109 4110 This can be represented as a collection of submatrices as: 4111 4112 .vb 4113 A B C 4114 D E F 4115 G H I 4116 .ve 4117 4118 Where the submatrices A,B,C are owned by proc0, D,E,F are 4119 owned by proc1, G,H,I are owned by proc2. 4120 4121 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4122 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4123 The 'M','N' parameters are 8,8, and have the same values on all procs. 4124 4125 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4126 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4127 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4128 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4129 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4130 matrix, ans [DF] as another SeqAIJ matrix. 4131 4132 When d_nz, o_nz parameters are specified, d_nz storage elements are 4133 allocated for every row of the local diagonal submatrix, and o_nz 4134 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4135 One way to choose d_nz and o_nz is to use the max nonzerors per local 4136 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4137 In this case, the values of d_nz,o_nz are: 4138 .vb 4139 proc0 : dnz = 2, o_nz = 2 4140 proc1 : dnz = 3, o_nz = 2 4141 proc2 : dnz = 1, o_nz = 4 4142 .ve 4143 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4144 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4145 for proc3. i.e we are using 12+15+10=37 storage locations to store 4146 34 values. 4147 4148 When d_nnz, o_nnz parameters are specified, the storage is specified 4149 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4150 In the above case the values for d_nnz,o_nnz are: 4151 .vb 4152 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4153 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4154 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4155 .ve 4156 Here the space allocated is sum of all the above values i.e 34, and 4157 hence pre-allocation is perfect. 4158 4159 Level: intermediate 4160 4161 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4162 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4163 @*/ 4164 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4165 { 4166 PetscErrorCode ierr; 4167 4168 PetscFunctionBegin; 4169 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4170 PetscValidType(B,1); 4171 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4172 PetscFunctionReturn(0); 4173 } 4174 4175 /*@ 4176 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4177 CSR format for the local rows. 4178 4179 Collective 4180 4181 Input Parameters: 4182 + comm - MPI communicator 4183 . m - number of local rows (Cannot be PETSC_DECIDE) 4184 . n - This value should be the same as the local size used in creating the 4185 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4186 calculated if N is given) For square matrices n is almost always m. 4187 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4188 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4189 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4190 . j - column indices 4191 - a - matrix values 4192 4193 Output Parameter: 4194 . mat - the matrix 4195 4196 Level: intermediate 4197 4198 Notes: 4199 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4200 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4201 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4202 4203 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4204 4205 The format which is used for the sparse matrix input, is equivalent to a 4206 row-major ordering.. i.e for the following matrix, the input data expected is 4207 as shown 4208 4209 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4210 4211 $ 1 0 0 4212 $ 2 0 3 P0 4213 $ ------- 4214 $ 4 5 6 P1 4215 $ 4216 $ Process0 [P0]: rows_owned=[0,1] 4217 $ i = {0,1,3} [size = nrow+1 = 2+1] 4218 $ j = {0,0,2} [size = 3] 4219 $ v = {1,2,3} [size = 3] 4220 $ 4221 $ Process1 [P1]: rows_owned=[2] 4222 $ i = {0,3} [size = nrow+1 = 1+1] 4223 $ j = {0,1,2} [size = 3] 4224 $ v = {4,5,6} [size = 3] 4225 4226 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4227 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4228 @*/ 4229 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4230 { 4231 PetscErrorCode ierr; 4232 4233 PetscFunctionBegin; 4234 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4235 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4236 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4237 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4238 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4239 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4240 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4241 PetscFunctionReturn(0); 4242 } 4243 4244 /*@ 4245 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4246 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4247 4248 Collective 4249 4250 Input Parameters: 4251 + mat - the matrix 4252 . m - number of local rows (Cannot be PETSC_DECIDE) 4253 . n - This value should be the same as the local size used in creating the 4254 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4255 calculated if N is given) For square matrices n is almost always m. 4256 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4257 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4258 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4259 . J - column indices 4260 - v - matrix values 4261 4262 Level: intermediate 4263 4264 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4265 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4266 @*/ 4267 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4268 { 4269 PetscErrorCode ierr; 4270 PetscInt cstart,nnz,i,j; 4271 PetscInt *ld; 4272 PetscBool nooffprocentries; 4273 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4274 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4275 PetscScalar *ad = Ad->a, *ao = Ao->a; 4276 const PetscInt *Adi = Ad->i; 4277 PetscInt ldi,Iii,md; 4278 4279 PetscFunctionBegin; 4280 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4281 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4282 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4283 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4284 4285 cstart = mat->cmap->rstart; 4286 if (!Aij->ld) { 4287 /* count number of entries below block diagonal */ 4288 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4289 Aij->ld = ld; 4290 for (i=0; i<m; i++) { 4291 nnz = Ii[i+1]- Ii[i]; 4292 j = 0; 4293 while (J[j] < cstart && j < nnz) {j++;} 4294 J += nnz; 4295 ld[i] = j; 4296 } 4297 } else { 4298 ld = Aij->ld; 4299 } 4300 4301 for (i=0; i<m; i++) { 4302 nnz = Ii[i+1]- Ii[i]; 4303 Iii = Ii[i]; 4304 ldi = ld[i]; 4305 md = Adi[i+1]-Adi[i]; 4306 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4307 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4308 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4309 ad += md; 4310 ao += nnz - md; 4311 } 4312 nooffprocentries = mat->nooffprocentries; 4313 mat->nooffprocentries = PETSC_TRUE; 4314 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4315 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4316 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4317 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4318 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4319 mat->nooffprocentries = nooffprocentries; 4320 PetscFunctionReturn(0); 4321 } 4322 4323 /*@C 4324 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4325 (the default parallel PETSc format). For good matrix assembly performance 4326 the user should preallocate the matrix storage by setting the parameters 4327 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4328 performance can be increased by more than a factor of 50. 4329 4330 Collective 4331 4332 Input Parameters: 4333 + comm - MPI communicator 4334 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4335 This value should be the same as the local size used in creating the 4336 y vector for the matrix-vector product y = Ax. 4337 . n - This value should be the same as the local size used in creating the 4338 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4339 calculated if N is given) For square matrices n is almost always m. 4340 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4341 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4342 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4343 (same value is used for all local rows) 4344 . d_nnz - array containing the number of nonzeros in the various rows of the 4345 DIAGONAL portion of the local submatrix (possibly different for each row) 4346 or NULL, if d_nz is used to specify the nonzero structure. 4347 The size of this array is equal to the number of local rows, i.e 'm'. 4348 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4349 submatrix (same value is used for all local rows). 4350 - o_nnz - array containing the number of nonzeros in the various rows of the 4351 OFF-DIAGONAL portion of the local submatrix (possibly different for 4352 each row) or NULL, if o_nz is used to specify the nonzero 4353 structure. The size of this array is equal to the number 4354 of local rows, i.e 'm'. 4355 4356 Output Parameter: 4357 . A - the matrix 4358 4359 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4360 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4361 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4362 4363 Notes: 4364 If the *_nnz parameter is given then the *_nz parameter is ignored 4365 4366 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4367 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4368 storage requirements for this matrix. 4369 4370 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4371 processor than it must be used on all processors that share the object for 4372 that argument. 4373 4374 The user MUST specify either the local or global matrix dimensions 4375 (possibly both). 4376 4377 The parallel matrix is partitioned across processors such that the 4378 first m0 rows belong to process 0, the next m1 rows belong to 4379 process 1, the next m2 rows belong to process 2 etc.. where 4380 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4381 values corresponding to [m x N] submatrix. 4382 4383 The columns are logically partitioned with the n0 columns belonging 4384 to 0th partition, the next n1 columns belonging to the next 4385 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4386 4387 The DIAGONAL portion of the local submatrix on any given processor 4388 is the submatrix corresponding to the rows and columns m,n 4389 corresponding to the given processor. i.e diagonal matrix on 4390 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4391 etc. The remaining portion of the local submatrix [m x (N-n)] 4392 constitute the OFF-DIAGONAL portion. The example below better 4393 illustrates this concept. 4394 4395 For a square global matrix we define each processor's diagonal portion 4396 to be its local rows and the corresponding columns (a square submatrix); 4397 each processor's off-diagonal portion encompasses the remainder of the 4398 local matrix (a rectangular submatrix). 4399 4400 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4401 4402 When calling this routine with a single process communicator, a matrix of 4403 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4404 type of communicator, use the construction mechanism 4405 .vb 4406 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4407 .ve 4408 4409 $ MatCreate(...,&A); 4410 $ MatSetType(A,MATMPIAIJ); 4411 $ MatSetSizes(A, m,n,M,N); 4412 $ MatMPIAIJSetPreallocation(A,...); 4413 4414 By default, this format uses inodes (identical nodes) when possible. 4415 We search for consecutive rows with the same nonzero structure, thereby 4416 reusing matrix information to achieve increased efficiency. 4417 4418 Options Database Keys: 4419 + -mat_no_inode - Do not use inodes 4420 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4421 4422 4423 4424 Example usage: 4425 4426 Consider the following 8x8 matrix with 34 non-zero values, that is 4427 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4428 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4429 as follows 4430 4431 .vb 4432 1 2 0 | 0 3 0 | 0 4 4433 Proc0 0 5 6 | 7 0 0 | 8 0 4434 9 0 10 | 11 0 0 | 12 0 4435 ------------------------------------- 4436 13 0 14 | 15 16 17 | 0 0 4437 Proc1 0 18 0 | 19 20 21 | 0 0 4438 0 0 0 | 22 23 0 | 24 0 4439 ------------------------------------- 4440 Proc2 25 26 27 | 0 0 28 | 29 0 4441 30 0 0 | 31 32 33 | 0 34 4442 .ve 4443 4444 This can be represented as a collection of submatrices as 4445 4446 .vb 4447 A B C 4448 D E F 4449 G H I 4450 .ve 4451 4452 Where the submatrices A,B,C are owned by proc0, D,E,F are 4453 owned by proc1, G,H,I are owned by proc2. 4454 4455 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4456 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4457 The 'M','N' parameters are 8,8, and have the same values on all procs. 4458 4459 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4460 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4461 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4462 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4463 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4464 matrix, ans [DF] as another SeqAIJ matrix. 4465 4466 When d_nz, o_nz parameters are specified, d_nz storage elements are 4467 allocated for every row of the local diagonal submatrix, and o_nz 4468 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4469 One way to choose d_nz and o_nz is to use the max nonzerors per local 4470 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4471 In this case, the values of d_nz,o_nz are 4472 .vb 4473 proc0 : dnz = 2, o_nz = 2 4474 proc1 : dnz = 3, o_nz = 2 4475 proc2 : dnz = 1, o_nz = 4 4476 .ve 4477 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4478 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4479 for proc3. i.e we are using 12+15+10=37 storage locations to store 4480 34 values. 4481 4482 When d_nnz, o_nnz parameters are specified, the storage is specified 4483 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4484 In the above case the values for d_nnz,o_nnz are 4485 .vb 4486 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4487 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4488 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4489 .ve 4490 Here the space allocated is sum of all the above values i.e 34, and 4491 hence pre-allocation is perfect. 4492 4493 Level: intermediate 4494 4495 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4496 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4497 @*/ 4498 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4499 { 4500 PetscErrorCode ierr; 4501 PetscMPIInt size; 4502 4503 PetscFunctionBegin; 4504 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4505 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4506 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4507 if (size > 1) { 4508 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4509 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4510 } else { 4511 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4512 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4513 } 4514 PetscFunctionReturn(0); 4515 } 4516 4517 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4518 { 4519 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4520 PetscBool flg; 4521 PetscErrorCode ierr; 4522 4523 PetscFunctionBegin; 4524 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4525 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4526 if (Ad) *Ad = a->A; 4527 if (Ao) *Ao = a->B; 4528 if (colmap) *colmap = a->garray; 4529 PetscFunctionReturn(0); 4530 } 4531 4532 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4533 { 4534 PetscErrorCode ierr; 4535 PetscInt m,N,i,rstart,nnz,Ii; 4536 PetscInt *indx; 4537 PetscScalar *values; 4538 4539 PetscFunctionBegin; 4540 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4541 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4542 PetscInt *dnz,*onz,sum,bs,cbs; 4543 4544 if (n == PETSC_DECIDE) { 4545 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4546 } 4547 /* Check sum(n) = N */ 4548 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4549 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4550 4551 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4552 rstart -= m; 4553 4554 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4555 for (i=0; i<m; i++) { 4556 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4557 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4558 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4559 } 4560 4561 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4562 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4563 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4564 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4565 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4566 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4567 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4568 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4569 } 4570 4571 /* numeric phase */ 4572 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4573 for (i=0; i<m; i++) { 4574 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4575 Ii = i + rstart; 4576 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4577 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4578 } 4579 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4580 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4581 PetscFunctionReturn(0); 4582 } 4583 4584 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4585 { 4586 PetscErrorCode ierr; 4587 PetscMPIInt rank; 4588 PetscInt m,N,i,rstart,nnz; 4589 size_t len; 4590 const PetscInt *indx; 4591 PetscViewer out; 4592 char *name; 4593 Mat B; 4594 const PetscScalar *values; 4595 4596 PetscFunctionBegin; 4597 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4598 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4599 /* Should this be the type of the diagonal block of A? */ 4600 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4601 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4602 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4603 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4604 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4605 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4606 for (i=0; i<m; i++) { 4607 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4608 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4609 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4610 } 4611 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4612 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4613 4614 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4615 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4616 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4617 sprintf(name,"%s.%d",outfile,rank); 4618 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4619 ierr = PetscFree(name);CHKERRQ(ierr); 4620 ierr = MatView(B,out);CHKERRQ(ierr); 4621 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4622 ierr = MatDestroy(&B);CHKERRQ(ierr); 4623 PetscFunctionReturn(0); 4624 } 4625 4626 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4627 { 4628 PetscErrorCode ierr; 4629 Mat_Merge_SeqsToMPI *merge; 4630 PetscContainer container; 4631 4632 PetscFunctionBegin; 4633 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4634 if (container) { 4635 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4636 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4637 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4638 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4639 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4640 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4641 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4642 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4643 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4644 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4645 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4646 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4647 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4648 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4649 ierr = PetscFree(merge);CHKERRQ(ierr); 4650 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4651 } 4652 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4653 PetscFunctionReturn(0); 4654 } 4655 4656 #include <../src/mat/utils/freespace.h> 4657 #include <petscbt.h> 4658 4659 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4660 { 4661 PetscErrorCode ierr; 4662 MPI_Comm comm; 4663 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4664 PetscMPIInt size,rank,taga,*len_s; 4665 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4666 PetscInt proc,m; 4667 PetscInt **buf_ri,**buf_rj; 4668 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4669 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4670 MPI_Request *s_waits,*r_waits; 4671 MPI_Status *status; 4672 MatScalar *aa=a->a; 4673 MatScalar **abuf_r,*ba_i; 4674 Mat_Merge_SeqsToMPI *merge; 4675 PetscContainer container; 4676 4677 PetscFunctionBegin; 4678 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4679 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4680 4681 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4682 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4683 4684 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4685 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4686 4687 bi = merge->bi; 4688 bj = merge->bj; 4689 buf_ri = merge->buf_ri; 4690 buf_rj = merge->buf_rj; 4691 4692 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4693 owners = merge->rowmap->range; 4694 len_s = merge->len_s; 4695 4696 /* send and recv matrix values */ 4697 /*-----------------------------*/ 4698 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4699 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4700 4701 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4702 for (proc=0,k=0; proc<size; proc++) { 4703 if (!len_s[proc]) continue; 4704 i = owners[proc]; 4705 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4706 k++; 4707 } 4708 4709 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4710 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4711 ierr = PetscFree(status);CHKERRQ(ierr); 4712 4713 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4714 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4715 4716 /* insert mat values of mpimat */ 4717 /*----------------------------*/ 4718 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4719 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4720 4721 for (k=0; k<merge->nrecv; k++) { 4722 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4723 nrows = *(buf_ri_k[k]); 4724 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4725 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4726 } 4727 4728 /* set values of ba */ 4729 m = merge->rowmap->n; 4730 for (i=0; i<m; i++) { 4731 arow = owners[rank] + i; 4732 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4733 bnzi = bi[i+1] - bi[i]; 4734 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4735 4736 /* add local non-zero vals of this proc's seqmat into ba */ 4737 anzi = ai[arow+1] - ai[arow]; 4738 aj = a->j + ai[arow]; 4739 aa = a->a + ai[arow]; 4740 nextaj = 0; 4741 for (j=0; nextaj<anzi; j++) { 4742 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4743 ba_i[j] += aa[nextaj++]; 4744 } 4745 } 4746 4747 /* add received vals into ba */ 4748 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4749 /* i-th row */ 4750 if (i == *nextrow[k]) { 4751 anzi = *(nextai[k]+1) - *nextai[k]; 4752 aj = buf_rj[k] + *(nextai[k]); 4753 aa = abuf_r[k] + *(nextai[k]); 4754 nextaj = 0; 4755 for (j=0; nextaj<anzi; j++) { 4756 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4757 ba_i[j] += aa[nextaj++]; 4758 } 4759 } 4760 nextrow[k]++; nextai[k]++; 4761 } 4762 } 4763 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4764 } 4765 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4766 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4767 4768 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4769 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4770 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4771 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4772 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4773 PetscFunctionReturn(0); 4774 } 4775 4776 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4777 { 4778 PetscErrorCode ierr; 4779 Mat B_mpi; 4780 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4781 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4782 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4783 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4784 PetscInt len,proc,*dnz,*onz,bs,cbs; 4785 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4786 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4787 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4788 MPI_Status *status; 4789 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4790 PetscBT lnkbt; 4791 Mat_Merge_SeqsToMPI *merge; 4792 PetscContainer container; 4793 4794 PetscFunctionBegin; 4795 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4796 4797 /* make sure it is a PETSc comm */ 4798 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4799 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4800 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4801 4802 ierr = PetscNew(&merge);CHKERRQ(ierr); 4803 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4804 4805 /* determine row ownership */ 4806 /*---------------------------------------------------------*/ 4807 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4808 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4809 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4810 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4811 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4812 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4813 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4814 4815 m = merge->rowmap->n; 4816 owners = merge->rowmap->range; 4817 4818 /* determine the number of messages to send, their lengths */ 4819 /*---------------------------------------------------------*/ 4820 len_s = merge->len_s; 4821 4822 len = 0; /* length of buf_si[] */ 4823 merge->nsend = 0; 4824 for (proc=0; proc<size; proc++) { 4825 len_si[proc] = 0; 4826 if (proc == rank) { 4827 len_s[proc] = 0; 4828 } else { 4829 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4830 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4831 } 4832 if (len_s[proc]) { 4833 merge->nsend++; 4834 nrows = 0; 4835 for (i=owners[proc]; i<owners[proc+1]; i++) { 4836 if (ai[i+1] > ai[i]) nrows++; 4837 } 4838 len_si[proc] = 2*(nrows+1); 4839 len += len_si[proc]; 4840 } 4841 } 4842 4843 /* determine the number and length of messages to receive for ij-structure */ 4844 /*-------------------------------------------------------------------------*/ 4845 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4846 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4847 4848 /* post the Irecv of j-structure */ 4849 /*-------------------------------*/ 4850 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4851 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4852 4853 /* post the Isend of j-structure */ 4854 /*--------------------------------*/ 4855 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4856 4857 for (proc=0, k=0; proc<size; proc++) { 4858 if (!len_s[proc]) continue; 4859 i = owners[proc]; 4860 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4861 k++; 4862 } 4863 4864 /* receives and sends of j-structure are complete */ 4865 /*------------------------------------------------*/ 4866 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4867 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4868 4869 /* send and recv i-structure */ 4870 /*---------------------------*/ 4871 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4872 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4873 4874 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4875 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4876 for (proc=0,k=0; proc<size; proc++) { 4877 if (!len_s[proc]) continue; 4878 /* form outgoing message for i-structure: 4879 buf_si[0]: nrows to be sent 4880 [1:nrows]: row index (global) 4881 [nrows+1:2*nrows+1]: i-structure index 4882 */ 4883 /*-------------------------------------------*/ 4884 nrows = len_si[proc]/2 - 1; 4885 buf_si_i = buf_si + nrows+1; 4886 buf_si[0] = nrows; 4887 buf_si_i[0] = 0; 4888 nrows = 0; 4889 for (i=owners[proc]; i<owners[proc+1]; i++) { 4890 anzi = ai[i+1] - ai[i]; 4891 if (anzi) { 4892 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4893 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4894 nrows++; 4895 } 4896 } 4897 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4898 k++; 4899 buf_si += len_si[proc]; 4900 } 4901 4902 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4903 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4904 4905 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4906 for (i=0; i<merge->nrecv; i++) { 4907 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4908 } 4909 4910 ierr = PetscFree(len_si);CHKERRQ(ierr); 4911 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4912 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4913 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4914 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4915 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4916 ierr = PetscFree(status);CHKERRQ(ierr); 4917 4918 /* compute a local seq matrix in each processor */ 4919 /*----------------------------------------------*/ 4920 /* allocate bi array and free space for accumulating nonzero column info */ 4921 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4922 bi[0] = 0; 4923 4924 /* create and initialize a linked list */ 4925 nlnk = N+1; 4926 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4927 4928 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4929 len = ai[owners[rank+1]] - ai[owners[rank]]; 4930 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4931 4932 current_space = free_space; 4933 4934 /* determine symbolic info for each local row */ 4935 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4936 4937 for (k=0; k<merge->nrecv; k++) { 4938 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4939 nrows = *buf_ri_k[k]; 4940 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4941 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4942 } 4943 4944 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4945 len = 0; 4946 for (i=0; i<m; i++) { 4947 bnzi = 0; 4948 /* add local non-zero cols of this proc's seqmat into lnk */ 4949 arow = owners[rank] + i; 4950 anzi = ai[arow+1] - ai[arow]; 4951 aj = a->j + ai[arow]; 4952 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4953 bnzi += nlnk; 4954 /* add received col data into lnk */ 4955 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4956 if (i == *nextrow[k]) { /* i-th row */ 4957 anzi = *(nextai[k]+1) - *nextai[k]; 4958 aj = buf_rj[k] + *nextai[k]; 4959 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4960 bnzi += nlnk; 4961 nextrow[k]++; nextai[k]++; 4962 } 4963 } 4964 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4965 4966 /* if free space is not available, make more free space */ 4967 if (current_space->local_remaining<bnzi) { 4968 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4969 nspacedouble++; 4970 } 4971 /* copy data into free space, then initialize lnk */ 4972 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4973 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4974 4975 current_space->array += bnzi; 4976 current_space->local_used += bnzi; 4977 current_space->local_remaining -= bnzi; 4978 4979 bi[i+1] = bi[i] + bnzi; 4980 } 4981 4982 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4983 4984 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4985 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4986 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4987 4988 /* create symbolic parallel matrix B_mpi */ 4989 /*---------------------------------------*/ 4990 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4991 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4992 if (n==PETSC_DECIDE) { 4993 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4994 } else { 4995 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4996 } 4997 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4998 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4999 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5000 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5001 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5002 5003 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5004 B_mpi->assembled = PETSC_FALSE; 5005 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5006 merge->bi = bi; 5007 merge->bj = bj; 5008 merge->buf_ri = buf_ri; 5009 merge->buf_rj = buf_rj; 5010 merge->coi = NULL; 5011 merge->coj = NULL; 5012 merge->owners_co = NULL; 5013 5014 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5015 5016 /* attach the supporting struct to B_mpi for reuse */ 5017 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5018 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5019 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5020 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5021 *mpimat = B_mpi; 5022 5023 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5024 PetscFunctionReturn(0); 5025 } 5026 5027 /*@C 5028 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5029 matrices from each processor 5030 5031 Collective 5032 5033 Input Parameters: 5034 + comm - the communicators the parallel matrix will live on 5035 . seqmat - the input sequential matrices 5036 . m - number of local rows (or PETSC_DECIDE) 5037 . n - number of local columns (or PETSC_DECIDE) 5038 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5039 5040 Output Parameter: 5041 . mpimat - the parallel matrix generated 5042 5043 Level: advanced 5044 5045 Notes: 5046 The dimensions of the sequential matrix in each processor MUST be the same. 5047 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5048 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5049 @*/ 5050 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5051 { 5052 PetscErrorCode ierr; 5053 PetscMPIInt size; 5054 5055 PetscFunctionBegin; 5056 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5057 if (size == 1) { 5058 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5059 if (scall == MAT_INITIAL_MATRIX) { 5060 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5061 } else { 5062 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5063 } 5064 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5065 PetscFunctionReturn(0); 5066 } 5067 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5068 if (scall == MAT_INITIAL_MATRIX) { 5069 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5070 } 5071 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5072 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5073 PetscFunctionReturn(0); 5074 } 5075 5076 /*@ 5077 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5078 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5079 with MatGetSize() 5080 5081 Not Collective 5082 5083 Input Parameters: 5084 + A - the matrix 5085 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5086 5087 Output Parameter: 5088 . A_loc - the local sequential matrix generated 5089 5090 Level: developer 5091 5092 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5093 5094 @*/ 5095 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5096 { 5097 PetscErrorCode ierr; 5098 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5099 Mat_SeqAIJ *mat,*a,*b; 5100 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5101 MatScalar *aa,*ba,*cam; 5102 PetscScalar *ca; 5103 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5104 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5105 PetscBool match; 5106 MPI_Comm comm; 5107 PetscMPIInt size; 5108 5109 PetscFunctionBegin; 5110 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5111 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5112 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5113 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5114 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5115 5116 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5117 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5118 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5119 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5120 aa = a->a; ba = b->a; 5121 if (scall == MAT_INITIAL_MATRIX) { 5122 if (size == 1) { 5123 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5124 PetscFunctionReturn(0); 5125 } 5126 5127 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5128 ci[0] = 0; 5129 for (i=0; i<am; i++) { 5130 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5131 } 5132 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5133 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5134 k = 0; 5135 for (i=0; i<am; i++) { 5136 ncols_o = bi[i+1] - bi[i]; 5137 ncols_d = ai[i+1] - ai[i]; 5138 /* off-diagonal portion of A */ 5139 for (jo=0; jo<ncols_o; jo++) { 5140 col = cmap[*bj]; 5141 if (col >= cstart) break; 5142 cj[k] = col; bj++; 5143 ca[k++] = *ba++; 5144 } 5145 /* diagonal portion of A */ 5146 for (j=0; j<ncols_d; j++) { 5147 cj[k] = cstart + *aj++; 5148 ca[k++] = *aa++; 5149 } 5150 /* off-diagonal portion of A */ 5151 for (j=jo; j<ncols_o; j++) { 5152 cj[k] = cmap[*bj++]; 5153 ca[k++] = *ba++; 5154 } 5155 } 5156 /* put together the new matrix */ 5157 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5158 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5159 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5160 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5161 mat->free_a = PETSC_TRUE; 5162 mat->free_ij = PETSC_TRUE; 5163 mat->nonew = 0; 5164 } else if (scall == MAT_REUSE_MATRIX) { 5165 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5166 ci = mat->i; cj = mat->j; cam = mat->a; 5167 for (i=0; i<am; i++) { 5168 /* off-diagonal portion of A */ 5169 ncols_o = bi[i+1] - bi[i]; 5170 for (jo=0; jo<ncols_o; jo++) { 5171 col = cmap[*bj]; 5172 if (col >= cstart) break; 5173 *cam++ = *ba++; bj++; 5174 } 5175 /* diagonal portion of A */ 5176 ncols_d = ai[i+1] - ai[i]; 5177 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5178 /* off-diagonal portion of A */ 5179 for (j=jo; j<ncols_o; j++) { 5180 *cam++ = *ba++; bj++; 5181 } 5182 } 5183 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5184 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5185 PetscFunctionReturn(0); 5186 } 5187 5188 /*@C 5189 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5190 5191 Not Collective 5192 5193 Input Parameters: 5194 + A - the matrix 5195 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5196 - row, col - index sets of rows and columns to extract (or NULL) 5197 5198 Output Parameter: 5199 . A_loc - the local sequential matrix generated 5200 5201 Level: developer 5202 5203 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5204 5205 @*/ 5206 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5207 { 5208 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5209 PetscErrorCode ierr; 5210 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5211 IS isrowa,iscola; 5212 Mat *aloc; 5213 PetscBool match; 5214 5215 PetscFunctionBegin; 5216 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5217 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5218 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5219 if (!row) { 5220 start = A->rmap->rstart; end = A->rmap->rend; 5221 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5222 } else { 5223 isrowa = *row; 5224 } 5225 if (!col) { 5226 start = A->cmap->rstart; 5227 cmap = a->garray; 5228 nzA = a->A->cmap->n; 5229 nzB = a->B->cmap->n; 5230 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5231 ncols = 0; 5232 for (i=0; i<nzB; i++) { 5233 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5234 else break; 5235 } 5236 imark = i; 5237 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5238 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5239 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5240 } else { 5241 iscola = *col; 5242 } 5243 if (scall != MAT_INITIAL_MATRIX) { 5244 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5245 aloc[0] = *A_loc; 5246 } 5247 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5248 if (!col) { /* attach global id of condensed columns */ 5249 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5250 } 5251 *A_loc = aloc[0]; 5252 ierr = PetscFree(aloc);CHKERRQ(ierr); 5253 if (!row) { 5254 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5255 } 5256 if (!col) { 5257 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5258 } 5259 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5260 PetscFunctionReturn(0); 5261 } 5262 5263 /* 5264 * Destroy a mat that may be composed with PetscSF communication objects. 5265 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5266 * */ 5267 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5268 { 5269 PetscSF sf,osf; 5270 PetscErrorCode ierr; 5271 5272 PetscFunctionBegin; 5273 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5274 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5275 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5276 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5277 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5278 PetscFunctionReturn(0); 5279 } 5280 5281 /* 5282 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5283 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5284 * on a global size. 5285 * */ 5286 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5287 { 5288 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5289 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5290 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,owner,lidx,*nrcols,*nlcols; 5291 PetscSFNode *iremote,*oiremote; 5292 const PetscInt *lrowindices; 5293 PetscErrorCode ierr; 5294 PetscSF sf,osf; 5295 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5296 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5297 MPI_Comm comm; 5298 ISLocalToGlobalMapping mapping; 5299 5300 PetscFunctionBegin; 5301 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5302 /* plocalsize is the number of roots 5303 * nrows is the number of leaves 5304 * */ 5305 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5306 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5307 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5308 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5309 for (i=0;i<nrows;i++) { 5310 /* Find a remote index and an owner for a row 5311 * The row could be local or remote 5312 * */ 5313 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5314 iremote[i].index = lidx; 5315 iremote[i].rank = owner; 5316 } 5317 /* Create SF to communicate how many nonzero columns for each row */ 5318 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5319 /* SF will figure out the number of nonzero colunms for each row, and their 5320 * offsets 5321 * */ 5322 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5323 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5324 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5325 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5326 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5327 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5328 roffsets[0] = 0; 5329 roffsets[1] = 0; 5330 for (i=0;i<plocalsize;i++) { 5331 /* diag */ 5332 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5333 /* off diag */ 5334 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5335 /* compute offsets so that we relative location for each row */ 5336 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5337 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5338 } 5339 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5340 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5341 /* 'r' means root, and 'l' means leaf */ 5342 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5343 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5344 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5345 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5346 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5347 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5348 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5349 dntotalcols = 0; 5350 ontotalcols = 0; 5351 for (i=0;i<nrows;i++) { 5352 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5353 /* diag */ 5354 dntotalcols += nlcols[i*2+0]; 5355 /* off diag */ 5356 ontotalcols += nlcols[i*2+1]; 5357 } 5358 /* We do not need to figure the right number of columns 5359 * since all the calculations will be done by going through the raw data 5360 * */ 5361 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,nrows,0,pnnz,P_oth);CHKERRQ(ierr); 5362 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5363 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5364 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5365 /* diag */ 5366 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5367 /* off diag */ 5368 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5369 /* diag */ 5370 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5371 /* off diag */ 5372 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5373 dntotalcols = 0; 5374 ontotalcols = 0; 5375 ntotalcols = 0; 5376 for (i=0;i<nrows;i++) { 5377 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5378 /* Set iremote for diag matrix */ 5379 for (j=0;j<nlcols[i*2+0];j++) { 5380 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5381 iremote[dntotalcols].rank = owner; 5382 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5383 ilocal[dntotalcols++] = ntotalcols++; 5384 } 5385 /* off diag */ 5386 for (j=0;j<nlcols[i*2+1];j++) { 5387 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5388 oiremote[ontotalcols].rank = owner; 5389 oilocal[ontotalcols++] = ntotalcols++; 5390 } 5391 } 5392 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5393 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5394 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5395 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5396 /* P serves as roots and P_oth is leaves 5397 * Diag matrix 5398 * */ 5399 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5400 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5401 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5402 5403 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5404 /* Off diag */ 5405 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5406 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5407 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5408 /* We operate on the matrix internal data for saving memory */ 5409 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5410 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5411 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5412 /* Convert to global indices for diag matrix */ 5413 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5414 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5415 /* We want P_oth store global indices */ 5416 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5417 /* Use memory scalable approach */ 5418 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5419 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5420 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5421 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5422 /* Convert back to local indices */ 5423 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5424 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5425 nout = 0; 5426 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5427 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5428 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5429 /* Exchange values */ 5430 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5431 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5432 /* Stop PETSc from shrinking memory */ 5433 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5434 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5435 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5436 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5437 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5438 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5439 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5440 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5441 PetscFunctionReturn(0); 5442 } 5443 5444 /* 5445 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5446 * This supports MPIAIJ and MAIJ 5447 * */ 5448 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,MatReuse reuse,Mat *P_oth) 5449 { 5450 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5451 Mat_SeqAIJ *ao=(Mat_SeqAIJ*)(a->B)->data,*p_oth; 5452 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5453 IS rows; 5454 PetscHSetI ht; 5455 PetscInt i,htsize,*rowindices,off; 5456 MPI_Comm comm; 5457 PetscSF sf,osf; 5458 PetscErrorCode ierr; 5459 5460 PetscFunctionBegin; 5461 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5462 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5463 * and then create a submatrix (that often is an overlapping matrix) 5464 * */ 5465 if (reuse==MAT_INITIAL_MATRIX) { 5466 /* Use a hash table to figure out unique keys */ 5467 ierr = PetscHSetICreate(&ht);CHKERRQ(ierr); 5468 for (i=0;i<ao->i[a->B->rmap->n];i++) { 5469 /* Convert to global keys */ 5470 ierr = PetscHSetIAdd(ht,a->garray[ao->j[i]]);CHKERRQ(ierr); 5471 } 5472 ierr = PetscHSetIGetSize(ht,&htsize);CHKERRQ(ierr); 5473 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5474 off = 0; 5475 ierr = PetscHSetIGetElems(ht,&off,rowindices);CHKERRQ(ierr); 5476 ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr); 5477 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5478 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5479 /* In case, the matrix was already created but users want to recreate the matrix */ 5480 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5481 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5482 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5483 } else if (reuse==MAT_REUSE_MATRIX) { 5484 /* If matrix was already created, we simply update values using SF objects 5485 * that as attached to the matrix ealier. 5486 * */ 5487 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5488 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5489 if (!sf || !osf) { 5490 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5491 } 5492 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5493 /* Update values in place */ 5494 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5495 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5496 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5497 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5498 } else { 5499 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5500 } 5501 5502 PetscFunctionReturn(0); 5503 } 5504 5505 /*@C 5506 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5507 5508 Collective on Mat 5509 5510 Input Parameters: 5511 + A,B - the matrices in mpiaij format 5512 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5513 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5514 5515 Output Parameter: 5516 + rowb, colb - index sets of rows and columns of B to extract 5517 - B_seq - the sequential matrix generated 5518 5519 Level: developer 5520 5521 @*/ 5522 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5523 { 5524 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5525 PetscErrorCode ierr; 5526 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5527 IS isrowb,iscolb; 5528 Mat *bseq=NULL; 5529 5530 PetscFunctionBegin; 5531 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5532 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5533 } 5534 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5535 5536 if (scall == MAT_INITIAL_MATRIX) { 5537 start = A->cmap->rstart; 5538 cmap = a->garray; 5539 nzA = a->A->cmap->n; 5540 nzB = a->B->cmap->n; 5541 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5542 ncols = 0; 5543 for (i=0; i<nzB; i++) { /* row < local row index */ 5544 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5545 else break; 5546 } 5547 imark = i; 5548 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5549 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5550 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5551 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5552 } else { 5553 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5554 isrowb = *rowb; iscolb = *colb; 5555 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5556 bseq[0] = *B_seq; 5557 } 5558 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5559 *B_seq = bseq[0]; 5560 ierr = PetscFree(bseq);CHKERRQ(ierr); 5561 if (!rowb) { 5562 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5563 } else { 5564 *rowb = isrowb; 5565 } 5566 if (!colb) { 5567 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5568 } else { 5569 *colb = iscolb; 5570 } 5571 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5572 PetscFunctionReturn(0); 5573 } 5574 5575 /* 5576 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5577 of the OFF-DIAGONAL portion of local A 5578 5579 Collective on Mat 5580 5581 Input Parameters: 5582 + A,B - the matrices in mpiaij format 5583 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5584 5585 Output Parameter: 5586 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5587 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5588 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5589 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5590 5591 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5592 for this matrix. This is not desirable.. 5593 5594 Level: developer 5595 5596 */ 5597 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5598 { 5599 PetscErrorCode ierr; 5600 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5601 Mat_SeqAIJ *b_oth; 5602 VecScatter ctx; 5603 MPI_Comm comm; 5604 const PetscMPIInt *rprocs,*sprocs; 5605 const PetscInt *srow,*rstarts,*sstarts; 5606 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5607 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5608 PetscScalar *b_otha,*bufa,*bufA,*vals; 5609 MPI_Request *rwaits = NULL,*swaits = NULL; 5610 MPI_Status rstatus; 5611 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5612 5613 PetscFunctionBegin; 5614 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5615 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5616 5617 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5618 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5619 } 5620 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5621 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5622 5623 if (size == 1) { 5624 startsj_s = NULL; 5625 bufa_ptr = NULL; 5626 *B_oth = NULL; 5627 PetscFunctionReturn(0); 5628 } 5629 5630 ctx = a->Mvctx; 5631 tag = ((PetscObject)ctx)->tag; 5632 5633 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5634 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5635 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5636 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5637 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5638 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5639 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5640 5641 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5642 if (scall == MAT_INITIAL_MATRIX) { 5643 /* i-array */ 5644 /*---------*/ 5645 /* post receives */ 5646 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5647 for (i=0; i<nrecvs; i++) { 5648 rowlen = rvalues + rstarts[i]*rbs; 5649 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5650 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5651 } 5652 5653 /* pack the outgoing message */ 5654 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5655 5656 sstartsj[0] = 0; 5657 rstartsj[0] = 0; 5658 len = 0; /* total length of j or a array to be sent */ 5659 if (nsends) { 5660 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5661 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5662 } 5663 for (i=0; i<nsends; i++) { 5664 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5665 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5666 for (j=0; j<nrows; j++) { 5667 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5668 for (l=0; l<sbs; l++) { 5669 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5670 5671 rowlen[j*sbs+l] = ncols; 5672 5673 len += ncols; 5674 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5675 } 5676 k++; 5677 } 5678 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5679 5680 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5681 } 5682 /* recvs and sends of i-array are completed */ 5683 i = nrecvs; 5684 while (i--) { 5685 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5686 } 5687 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5688 ierr = PetscFree(svalues);CHKERRQ(ierr); 5689 5690 /* allocate buffers for sending j and a arrays */ 5691 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5692 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5693 5694 /* create i-array of B_oth */ 5695 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5696 5697 b_othi[0] = 0; 5698 len = 0; /* total length of j or a array to be received */ 5699 k = 0; 5700 for (i=0; i<nrecvs; i++) { 5701 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5702 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5703 for (j=0; j<nrows; j++) { 5704 b_othi[k+1] = b_othi[k] + rowlen[j]; 5705 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5706 k++; 5707 } 5708 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5709 } 5710 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5711 5712 /* allocate space for j and a arrrays of B_oth */ 5713 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5714 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5715 5716 /* j-array */ 5717 /*---------*/ 5718 /* post receives of j-array */ 5719 for (i=0; i<nrecvs; i++) { 5720 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5721 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5722 } 5723 5724 /* pack the outgoing message j-array */ 5725 if (nsends) k = sstarts[0]; 5726 for (i=0; i<nsends; i++) { 5727 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5728 bufJ = bufj+sstartsj[i]; 5729 for (j=0; j<nrows; j++) { 5730 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5731 for (ll=0; ll<sbs; ll++) { 5732 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5733 for (l=0; l<ncols; l++) { 5734 *bufJ++ = cols[l]; 5735 } 5736 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5737 } 5738 } 5739 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5740 } 5741 5742 /* recvs and sends of j-array are completed */ 5743 i = nrecvs; 5744 while (i--) { 5745 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5746 } 5747 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5748 } else if (scall == MAT_REUSE_MATRIX) { 5749 sstartsj = *startsj_s; 5750 rstartsj = *startsj_r; 5751 bufa = *bufa_ptr; 5752 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5753 b_otha = b_oth->a; 5754 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5755 5756 /* a-array */ 5757 /*---------*/ 5758 /* post receives of a-array */ 5759 for (i=0; i<nrecvs; i++) { 5760 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5761 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5762 } 5763 5764 /* pack the outgoing message a-array */ 5765 if (nsends) k = sstarts[0]; 5766 for (i=0; i<nsends; i++) { 5767 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5768 bufA = bufa+sstartsj[i]; 5769 for (j=0; j<nrows; j++) { 5770 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5771 for (ll=0; ll<sbs; ll++) { 5772 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5773 for (l=0; l<ncols; l++) { 5774 *bufA++ = vals[l]; 5775 } 5776 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5777 } 5778 } 5779 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5780 } 5781 /* recvs and sends of a-array are completed */ 5782 i = nrecvs; 5783 while (i--) { 5784 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5785 } 5786 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5787 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5788 5789 if (scall == MAT_INITIAL_MATRIX) { 5790 /* put together the new matrix */ 5791 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5792 5793 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5794 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5795 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5796 b_oth->free_a = PETSC_TRUE; 5797 b_oth->free_ij = PETSC_TRUE; 5798 b_oth->nonew = 0; 5799 5800 ierr = PetscFree(bufj);CHKERRQ(ierr); 5801 if (!startsj_s || !bufa_ptr) { 5802 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5803 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5804 } else { 5805 *startsj_s = sstartsj; 5806 *startsj_r = rstartsj; 5807 *bufa_ptr = bufa; 5808 } 5809 } 5810 5811 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5812 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5813 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5814 PetscFunctionReturn(0); 5815 } 5816 5817 /*@C 5818 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5819 5820 Not Collective 5821 5822 Input Parameters: 5823 . A - The matrix in mpiaij format 5824 5825 Output Parameter: 5826 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5827 . colmap - A map from global column index to local index into lvec 5828 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5829 5830 Level: developer 5831 5832 @*/ 5833 #if defined(PETSC_USE_CTABLE) 5834 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5835 #else 5836 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5837 #endif 5838 { 5839 Mat_MPIAIJ *a; 5840 5841 PetscFunctionBegin; 5842 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5843 PetscValidPointer(lvec, 2); 5844 PetscValidPointer(colmap, 3); 5845 PetscValidPointer(multScatter, 4); 5846 a = (Mat_MPIAIJ*) A->data; 5847 if (lvec) *lvec = a->lvec; 5848 if (colmap) *colmap = a->colmap; 5849 if (multScatter) *multScatter = a->Mvctx; 5850 PetscFunctionReturn(0); 5851 } 5852 5853 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5854 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5855 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5856 #if defined(PETSC_HAVE_MKL_SPARSE) 5857 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5858 #endif 5859 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5860 #if defined(PETSC_HAVE_ELEMENTAL) 5861 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5862 #endif 5863 #if defined(PETSC_HAVE_HYPRE) 5864 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5865 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5866 #endif 5867 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5868 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5869 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5870 5871 /* 5872 Computes (B'*A')' since computing B*A directly is untenable 5873 5874 n p p 5875 ( ) ( ) ( ) 5876 m ( A ) * n ( B ) = m ( C ) 5877 ( ) ( ) ( ) 5878 5879 */ 5880 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5881 { 5882 PetscErrorCode ierr; 5883 Mat At,Bt,Ct; 5884 5885 PetscFunctionBegin; 5886 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5887 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5888 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5889 ierr = MatDestroy(&At);CHKERRQ(ierr); 5890 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5891 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5892 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5893 PetscFunctionReturn(0); 5894 } 5895 5896 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5897 { 5898 PetscErrorCode ierr; 5899 PetscInt m=A->rmap->n,n=B->cmap->n; 5900 Mat Cmat; 5901 5902 PetscFunctionBegin; 5903 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5904 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5905 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5906 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5907 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5908 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5909 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5910 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5911 5912 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5913 5914 *C = Cmat; 5915 PetscFunctionReturn(0); 5916 } 5917 5918 /* ----------------------------------------------------------------*/ 5919 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5920 { 5921 PetscErrorCode ierr; 5922 5923 PetscFunctionBegin; 5924 if (scall == MAT_INITIAL_MATRIX) { 5925 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5926 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5927 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5928 } 5929 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5930 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5931 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5932 PetscFunctionReturn(0); 5933 } 5934 5935 /*MC 5936 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5937 5938 Options Database Keys: 5939 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5940 5941 Level: beginner 5942 5943 .seealso: MatCreateAIJ() 5944 M*/ 5945 5946 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5947 { 5948 Mat_MPIAIJ *b; 5949 PetscErrorCode ierr; 5950 PetscMPIInt size; 5951 5952 PetscFunctionBegin; 5953 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5954 5955 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5956 B->data = (void*)b; 5957 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5958 B->assembled = PETSC_FALSE; 5959 B->insertmode = NOT_SET_VALUES; 5960 b->size = size; 5961 5962 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5963 5964 /* build cache for off array entries formed */ 5965 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5966 5967 b->donotstash = PETSC_FALSE; 5968 b->colmap = 0; 5969 b->garray = 0; 5970 b->roworiented = PETSC_TRUE; 5971 5972 /* stuff used for matrix vector multiply */ 5973 b->lvec = NULL; 5974 b->Mvctx = NULL; 5975 5976 /* stuff for MatGetRow() */ 5977 b->rowindices = 0; 5978 b->rowvalues = 0; 5979 b->getrowactive = PETSC_FALSE; 5980 5981 /* flexible pointer used in CUSP/CUSPARSE classes */ 5982 b->spptr = NULL; 5983 5984 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5985 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5986 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5987 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5988 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5989 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5990 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5991 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5992 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5993 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5994 #if defined(PETSC_HAVE_MKL_SPARSE) 5995 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5996 #endif 5997 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5998 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5999 #if defined(PETSC_HAVE_ELEMENTAL) 6000 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6001 #endif 6002 #if defined(PETSC_HAVE_HYPRE) 6003 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6004 #endif 6005 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6006 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6007 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 6008 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 6009 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 6010 #if defined(PETSC_HAVE_HYPRE) 6011 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6012 #endif 6013 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 6014 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6015 PetscFunctionReturn(0); 6016 } 6017 6018 /*@C 6019 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6020 and "off-diagonal" part of the matrix in CSR format. 6021 6022 Collective 6023 6024 Input Parameters: 6025 + comm - MPI communicator 6026 . m - number of local rows (Cannot be PETSC_DECIDE) 6027 . n - This value should be the same as the local size used in creating the 6028 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6029 calculated if N is given) For square matrices n is almost always m. 6030 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6031 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6032 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6033 . j - column indices 6034 . a - matrix values 6035 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6036 . oj - column indices 6037 - oa - matrix values 6038 6039 Output Parameter: 6040 . mat - the matrix 6041 6042 Level: advanced 6043 6044 Notes: 6045 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6046 must free the arrays once the matrix has been destroyed and not before. 6047 6048 The i and j indices are 0 based 6049 6050 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6051 6052 This sets local rows and cannot be used to set off-processor values. 6053 6054 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6055 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6056 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6057 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6058 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6059 communication if it is known that only local entries will be set. 6060 6061 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6062 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6063 @*/ 6064 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6065 { 6066 PetscErrorCode ierr; 6067 Mat_MPIAIJ *maij; 6068 6069 PetscFunctionBegin; 6070 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6071 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6072 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6073 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6074 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6075 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6076 maij = (Mat_MPIAIJ*) (*mat)->data; 6077 6078 (*mat)->preallocated = PETSC_TRUE; 6079 6080 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6081 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6082 6083 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6084 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6085 6086 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6087 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6088 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6089 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6090 6091 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6092 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6093 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6094 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6095 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6096 PetscFunctionReturn(0); 6097 } 6098 6099 /* 6100 Special version for direct calls from Fortran 6101 */ 6102 #include <petsc/private/fortranimpl.h> 6103 6104 /* Change these macros so can be used in void function */ 6105 #undef CHKERRQ 6106 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6107 #undef SETERRQ2 6108 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6109 #undef SETERRQ3 6110 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6111 #undef SETERRQ 6112 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6113 6114 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6115 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6116 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6117 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6118 #else 6119 #endif 6120 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6121 { 6122 Mat mat = *mmat; 6123 PetscInt m = *mm, n = *mn; 6124 InsertMode addv = *maddv; 6125 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6126 PetscScalar value; 6127 PetscErrorCode ierr; 6128 6129 MatCheckPreallocated(mat,1); 6130 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6131 6132 #if defined(PETSC_USE_DEBUG) 6133 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6134 #endif 6135 { 6136 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6137 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6138 PetscBool roworiented = aij->roworiented; 6139 6140 /* Some Variables required in the macro */ 6141 Mat A = aij->A; 6142 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6143 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6144 MatScalar *aa = a->a; 6145 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6146 Mat B = aij->B; 6147 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6148 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6149 MatScalar *ba = b->a; 6150 6151 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6152 PetscInt nonew = a->nonew; 6153 MatScalar *ap1,*ap2; 6154 6155 PetscFunctionBegin; 6156 for (i=0; i<m; i++) { 6157 if (im[i] < 0) continue; 6158 #if defined(PETSC_USE_DEBUG) 6159 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6160 #endif 6161 if (im[i] >= rstart && im[i] < rend) { 6162 row = im[i] - rstart; 6163 lastcol1 = -1; 6164 rp1 = aj + ai[row]; 6165 ap1 = aa + ai[row]; 6166 rmax1 = aimax[row]; 6167 nrow1 = ailen[row]; 6168 low1 = 0; 6169 high1 = nrow1; 6170 lastcol2 = -1; 6171 rp2 = bj + bi[row]; 6172 ap2 = ba + bi[row]; 6173 rmax2 = bimax[row]; 6174 nrow2 = bilen[row]; 6175 low2 = 0; 6176 high2 = nrow2; 6177 6178 for (j=0; j<n; j++) { 6179 if (roworiented) value = v[i*n+j]; 6180 else value = v[i+j*m]; 6181 if (in[j] >= cstart && in[j] < cend) { 6182 col = in[j] - cstart; 6183 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 6184 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6185 } else if (in[j] < 0) continue; 6186 #if defined(PETSC_USE_DEBUG) 6187 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6188 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6189 #endif 6190 else { 6191 if (mat->was_assembled) { 6192 if (!aij->colmap) { 6193 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6194 } 6195 #if defined(PETSC_USE_CTABLE) 6196 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6197 col--; 6198 #else 6199 col = aij->colmap[in[j]] - 1; 6200 #endif 6201 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 6202 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6203 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6204 col = in[j]; 6205 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6206 B = aij->B; 6207 b = (Mat_SeqAIJ*)B->data; 6208 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6209 rp2 = bj + bi[row]; 6210 ap2 = ba + bi[row]; 6211 rmax2 = bimax[row]; 6212 nrow2 = bilen[row]; 6213 low2 = 0; 6214 high2 = nrow2; 6215 bm = aij->B->rmap->n; 6216 ba = b->a; 6217 } 6218 } else col = in[j]; 6219 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6220 } 6221 } 6222 } else if (!aij->donotstash) { 6223 if (roworiented) { 6224 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6225 } else { 6226 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6227 } 6228 } 6229 } 6230 } 6231 PetscFunctionReturnVoid(); 6232 } 6233