1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 for (i=0; i<m; i++) { 502 if (im[i] < 0) continue; 503 #if defined(PETSC_USE_DEBUG) 504 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 505 #endif 506 if (im[i] >= rstart && im[i] < rend) { 507 row = im[i] - rstart; 508 lastcol1 = -1; 509 rp1 = aj + ai[row]; 510 ap1 = aa + ai[row]; 511 rmax1 = aimax[row]; 512 nrow1 = ailen[row]; 513 low1 = 0; 514 high1 = nrow1; 515 lastcol2 = -1; 516 rp2 = bj + bi[row]; 517 ap2 = ba + bi[row]; 518 rmax2 = bimax[row]; 519 nrow2 = bilen[row]; 520 low2 = 0; 521 high2 = nrow2; 522 523 for (j=0; j<n; j++) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend) { 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined(PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend) { 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 PetscFunctionReturn(0); 622 } 623 624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt nstash,reallocs; 633 634 PetscFunctionBegin; 635 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 636 637 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 638 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 639 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 640 PetscFunctionReturn(0); 641 } 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 645 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 646 { 647 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 648 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 649 PetscErrorCode ierr; 650 PetscMPIInt n; 651 PetscInt i,j,rstart,ncols,flg; 652 PetscInt *row,*col; 653 PetscBool other_disassembled; 654 PetscScalar *val; 655 656 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 657 658 PetscFunctionBegin; 659 if (!aij->donotstash && !mat->nooffprocentries) { 660 while (1) { 661 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 662 if (!flg) break; 663 664 for (i=0; i<n; ) { 665 /* Now identify the consecutive vals belonging to the same row */ 666 for (j=i,rstart=row[j]; j<n; j++) { 667 if (row[j] != rstart) break; 668 } 669 if (j < n) ncols = j-i; 670 else ncols = n-i; 671 /* Now assemble all these values with a single function call */ 672 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 673 674 i = j; 675 } 676 } 677 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 678 } 679 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 681 682 /* determine if any processor has disassembled, if so we must 683 also disassemble ourselfs, in order that we may reassemble. */ 684 /* 685 if nonzero structure of submatrix B cannot change then we know that 686 no processor disassembled thus we can skip this stuff 687 */ 688 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 689 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 690 if (mat->was_assembled && !other_disassembled) { 691 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 692 } 693 } 694 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 695 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 696 } 697 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 698 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 699 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 700 701 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 702 703 aij->rowvalues = 0; 704 705 /* used by MatAXPY() */ 706 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 707 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 708 709 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 710 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 711 712 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 713 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 714 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 715 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 716 } 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNCT__ 721 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 722 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 723 { 724 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 725 PetscErrorCode ierr; 726 727 PetscFunctionBegin; 728 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 729 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatZeroRows_MPIAIJ" 735 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 736 { 737 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 738 PetscInt *owners = A->rmap->range; 739 PetscInt n = A->rmap->n; 740 PetscSF sf; 741 PetscInt *lrows; 742 PetscSFNode *rrows; 743 PetscInt r, p = 0, len = 0; 744 PetscErrorCode ierr; 745 746 PetscFunctionBegin; 747 /* Create SF where leaves are input rows and roots are owned rows */ 748 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 749 for (r = 0; r < n; ++r) lrows[r] = -1; 750 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 751 for (r = 0; r < N; ++r) { 752 const PetscInt idx = rows[r]; 753 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 754 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 755 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 756 } 757 if (A->nooffproczerorows) { 758 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 759 lrows[len++] = idx - owners[p]; 760 } else { 761 rrows[r].rank = p; 762 rrows[r].index = rows[r] - owners[p]; 763 } 764 } 765 if (!A->nooffproczerorows) { 766 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 767 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 768 /* Collect flags for rows to be zeroed */ 769 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 770 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 771 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 772 /* Compress and put in row numbers */ 773 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 774 } 775 /* fix right hand side if needed */ 776 if (x && b) { 777 const PetscScalar *xx; 778 PetscScalar *bb; 779 780 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 781 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 782 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 783 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 784 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 785 } 786 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 787 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 788 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 789 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 790 } else if (diag != 0.0) { 791 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 792 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 793 for (r = 0; r < len; ++r) { 794 const PetscInt row = lrows[r] + A->rmap->rstart; 795 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 796 } 797 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 798 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 799 } else { 800 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 801 } 802 ierr = PetscFree(lrows);CHKERRQ(ierr); 803 804 /* only change matrix nonzero state if pattern was allowed to be changed */ 805 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 806 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 807 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 808 } 809 PetscFunctionReturn(0); 810 } 811 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 814 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 815 { 816 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 817 PetscErrorCode ierr; 818 PetscMPIInt n = A->rmap->n; 819 PetscInt i,j,r,m,p = 0,len = 0; 820 PetscInt *lrows,*owners = A->rmap->range; 821 PetscSFNode *rrows; 822 PetscSF sf; 823 const PetscScalar *xx; 824 PetscScalar *bb,*mask; 825 Vec xmask,lmask; 826 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 827 const PetscInt *aj, *ii,*ridx; 828 PetscScalar *aa; 829 830 PetscFunctionBegin; 831 /* Create SF where leaves are input rows and roots are owned rows */ 832 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 833 for (r = 0; r < n; ++r) lrows[r] = -1; 834 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 835 for (r = 0; r < N; ++r) { 836 const PetscInt idx = rows[r]; 837 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 838 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 839 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 840 } 841 rrows[r].rank = p; 842 rrows[r].index = rows[r] - owners[p]; 843 } 844 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 845 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 846 /* Collect flags for rows to be zeroed */ 847 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 848 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 849 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 850 /* Compress and put in row numbers */ 851 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 852 /* zero diagonal part of matrix */ 853 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 854 /* handle off diagonal part of matrix */ 855 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 856 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 857 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 858 for (i=0; i<len; i++) bb[lrows[i]] = 1; 859 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 860 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 861 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 863 if (x) { 864 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 865 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 866 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 867 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 868 } 869 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 870 /* remove zeroed rows of off diagonal matrix */ 871 ii = aij->i; 872 for (i=0; i<len; i++) { 873 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 874 } 875 /* loop over all elements of off process part of matrix zeroing removed columns*/ 876 if (aij->compressedrow.use) { 877 m = aij->compressedrow.nrows; 878 ii = aij->compressedrow.i; 879 ridx = aij->compressedrow.rindex; 880 for (i=0; i<m; i++) { 881 n = ii[i+1] - ii[i]; 882 aj = aij->j + ii[i]; 883 aa = aij->a + ii[i]; 884 885 for (j=0; j<n; j++) { 886 if (PetscAbsScalar(mask[*aj])) { 887 if (b) bb[*ridx] -= *aa*xx[*aj]; 888 *aa = 0.0; 889 } 890 aa++; 891 aj++; 892 } 893 ridx++; 894 } 895 } else { /* do not use compressed row format */ 896 m = l->B->rmap->n; 897 for (i=0; i<m; i++) { 898 n = ii[i+1] - ii[i]; 899 aj = aij->j + ii[i]; 900 aa = aij->a + ii[i]; 901 for (j=0; j<n; j++) { 902 if (PetscAbsScalar(mask[*aj])) { 903 if (b) bb[i] -= *aa*xx[*aj]; 904 *aa = 0.0; 905 } 906 aa++; 907 aj++; 908 } 909 } 910 } 911 if (x) { 912 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 913 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 914 } 915 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 916 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 917 ierr = PetscFree(lrows);CHKERRQ(ierr); 918 919 /* only change matrix nonzero state if pattern was allowed to be changed */ 920 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 921 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 922 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 923 } 924 PetscFunctionReturn(0); 925 } 926 927 #undef __FUNCT__ 928 #define __FUNCT__ "MatMult_MPIAIJ" 929 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 930 { 931 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 932 PetscErrorCode ierr; 933 PetscInt nt; 934 935 PetscFunctionBegin; 936 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 937 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 938 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 939 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 940 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 947 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 948 { 949 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 950 PetscErrorCode ierr; 951 952 PetscFunctionBegin; 953 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 #undef __FUNCT__ 958 #define __FUNCT__ "MatMultAdd_MPIAIJ" 959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 960 { 961 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 967 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 974 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 975 { 976 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 977 PetscErrorCode ierr; 978 PetscBool merged; 979 980 PetscFunctionBegin; 981 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 982 /* do nondiagonal part */ 983 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 984 if (!merged) { 985 /* send it on its way */ 986 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 987 /* do local part */ 988 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 989 /* receive remote parts: note this assumes the values are not actually */ 990 /* added in yy until the next line, */ 991 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 992 } else { 993 /* do local part */ 994 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 995 /* send it on its way */ 996 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 997 /* values actually were received in the Begin() but we need to call this nop */ 998 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 } 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1005 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1006 { 1007 MPI_Comm comm; 1008 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1009 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1010 IS Me,Notme; 1011 PetscErrorCode ierr; 1012 PetscInt M,N,first,last,*notme,i; 1013 PetscMPIInt size; 1014 1015 PetscFunctionBegin; 1016 /* Easy test: symmetric diagonal block */ 1017 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1018 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1019 if (!*f) PetscFunctionReturn(0); 1020 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1021 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1022 if (size == 1) PetscFunctionReturn(0); 1023 1024 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1025 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1026 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1027 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1028 for (i=0; i<first; i++) notme[i] = i; 1029 for (i=last; i<M; i++) notme[i-last+first] = i; 1030 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1031 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1032 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1033 Aoff = Aoffs[0]; 1034 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1035 Boff = Boffs[0]; 1036 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1037 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1038 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1039 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1040 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1041 ierr = PetscFree(notme);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1047 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1048 { 1049 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1050 PetscErrorCode ierr; 1051 1052 PetscFunctionBegin; 1053 /* do nondiagonal part */ 1054 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1055 /* send it on its way */ 1056 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1057 /* do local part */ 1058 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1059 /* receive remote parts */ 1060 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 /* 1065 This only works correctly for square matrices where the subblock A->A is the 1066 diagonal block 1067 */ 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1070 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1071 { 1072 PetscErrorCode ierr; 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 1075 PetscFunctionBegin; 1076 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1077 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1078 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "MatScale_MPIAIJ" 1084 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1085 { 1086 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1087 PetscErrorCode ierr; 1088 1089 PetscFunctionBegin; 1090 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1091 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatDestroy_Redundant" 1097 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1098 { 1099 PetscErrorCode ierr; 1100 Mat_Redundant *redund = *redundant; 1101 PetscInt i; 1102 1103 PetscFunctionBegin; 1104 *redundant = NULL; 1105 if (redund){ 1106 if (redund->matseq) { /* via MatGetSubMatrices() */ 1107 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1108 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1109 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1110 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1111 } else { 1112 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1113 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1114 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1115 for (i=0; i<redund->nrecvs; i++) { 1116 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1117 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1118 } 1119 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1120 } 1121 1122 if (redund->psubcomm) { 1123 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1124 } 1125 ierr = PetscFree(redund);CHKERRQ(ierr); 1126 } 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatDestroy_MPIAIJ" 1132 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1133 { 1134 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 #if defined(PETSC_USE_LOG) 1139 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1140 #endif 1141 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1142 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1143 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1144 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1145 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1146 #if defined(PETSC_USE_CTABLE) 1147 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1148 #else 1149 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1150 #endif 1151 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1152 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1153 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1154 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1155 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1156 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1157 1158 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1159 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1160 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1161 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1172 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1173 { 1174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1175 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1176 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1177 PetscErrorCode ierr; 1178 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1179 int fd; 1180 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1181 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1182 PetscScalar *column_values; 1183 PetscInt message_count,flowcontrolcount; 1184 FILE *file; 1185 1186 PetscFunctionBegin; 1187 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1188 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1189 nz = A->nz + B->nz; 1190 if (!rank) { 1191 header[0] = MAT_FILE_CLASSID; 1192 header[1] = mat->rmap->N; 1193 header[2] = mat->cmap->N; 1194 1195 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1196 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1197 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1198 /* get largest number of rows any processor has */ 1199 rlen = mat->rmap->n; 1200 range = mat->rmap->range; 1201 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1202 } else { 1203 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1204 rlen = mat->rmap->n; 1205 } 1206 1207 /* load up the local row counts */ 1208 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1209 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1210 1211 /* store the row lengths to the file */ 1212 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1213 if (!rank) { 1214 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1215 for (i=1; i<size; i++) { 1216 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1217 rlen = range[i+1] - range[i]; 1218 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1220 } 1221 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1222 } else { 1223 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1224 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1226 } 1227 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1228 1229 /* load up the local column indices */ 1230 nzmax = nz; /* th processor needs space a largest processor needs */ 1231 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1233 cnt = 0; 1234 for (i=0; i<mat->rmap->n; i++) { 1235 for (j=B->i[i]; j<B->i[i+1]; j++) { 1236 if ((col = garray[B->j[j]]) > cstart) break; 1237 column_indices[cnt++] = col; 1238 } 1239 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1240 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1241 } 1242 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1243 1244 /* store the column indices to the file */ 1245 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1246 if (!rank) { 1247 MPI_Status status; 1248 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1249 for (i=1; i<size; i++) { 1250 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1251 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1252 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1253 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1254 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1255 } 1256 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1257 } else { 1258 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1259 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1260 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1261 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1262 } 1263 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1264 1265 /* load up the local column values */ 1266 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1267 cnt = 0; 1268 for (i=0; i<mat->rmap->n; i++) { 1269 for (j=B->i[i]; j<B->i[i+1]; j++) { 1270 if (garray[B->j[j]] > cstart) break; 1271 column_values[cnt++] = B->a[j]; 1272 } 1273 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1274 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1275 } 1276 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1277 1278 /* store the column values to the file */ 1279 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1280 if (!rank) { 1281 MPI_Status status; 1282 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1283 for (i=1; i<size; i++) { 1284 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1285 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1286 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1287 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1288 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1289 } 1290 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1291 } else { 1292 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1293 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1294 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1295 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1296 } 1297 ierr = PetscFree(column_values);CHKERRQ(ierr); 1298 1299 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1300 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #include <petscdraw.h> 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1307 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1308 { 1309 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1310 PetscErrorCode ierr; 1311 PetscMPIInt rank = aij->rank,size = aij->size; 1312 PetscBool isdraw,iascii,isbinary; 1313 PetscViewer sviewer; 1314 PetscViewerFormat format; 1315 1316 PetscFunctionBegin; 1317 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1318 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1319 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1320 if (iascii) { 1321 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1322 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1323 MatInfo info; 1324 PetscBool inodes; 1325 1326 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1327 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1328 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1329 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1330 if (!inodes) { 1331 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1332 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1333 } else { 1334 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1335 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1336 } 1337 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1338 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1339 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1340 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1341 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1342 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1343 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1344 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1347 PetscInt inodecount,inodelimit,*inodes; 1348 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1349 if (inodes) { 1350 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1351 } else { 1352 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1353 } 1354 PetscFunctionReturn(0); 1355 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1356 PetscFunctionReturn(0); 1357 } 1358 } else if (isbinary) { 1359 if (size == 1) { 1360 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1361 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1362 } else { 1363 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1364 } 1365 PetscFunctionReturn(0); 1366 } else if (isdraw) { 1367 PetscDraw draw; 1368 PetscBool isnull; 1369 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1370 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1371 } 1372 1373 { 1374 /* assemble the entire matrix onto first processor. */ 1375 Mat A; 1376 Mat_SeqAIJ *Aloc; 1377 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1378 MatScalar *a; 1379 const char *matname; 1380 1381 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1382 if (!rank) { 1383 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1384 } else { 1385 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1386 } 1387 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1388 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1389 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1390 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1391 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1392 1393 /* copy over the A part */ 1394 Aloc = (Mat_SeqAIJ*)aij->A->data; 1395 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1396 row = mat->rmap->rstart; 1397 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1398 for (i=0; i<m; i++) { 1399 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1400 row++; 1401 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1402 } 1403 aj = Aloc->j; 1404 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1405 1406 /* copy over the B part */ 1407 Aloc = (Mat_SeqAIJ*)aij->B->data; 1408 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1409 row = mat->rmap->rstart; 1410 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1411 ct = cols; 1412 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1413 for (i=0; i<m; i++) { 1414 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1415 row++; 1416 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1417 } 1418 ierr = PetscFree(ct);CHKERRQ(ierr); 1419 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1420 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1421 /* 1422 Everyone has to call to draw the matrix since the graphics waits are 1423 synchronized across all processors that share the PetscDraw object 1424 */ 1425 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1426 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1427 if (!rank) { 1428 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1429 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1430 } 1431 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1432 ierr = MatDestroy(&A);CHKERRQ(ierr); 1433 } 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatView_MPIAIJ" 1439 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1440 { 1441 PetscErrorCode ierr; 1442 PetscBool iascii,isdraw,issocket,isbinary; 1443 1444 PetscFunctionBegin; 1445 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1446 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1447 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1448 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1449 if (iascii || isdraw || isbinary || issocket) { 1450 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatSOR_MPIAIJ" 1457 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1458 { 1459 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1460 PetscErrorCode ierr; 1461 Vec bb1 = 0; 1462 PetscBool hasop; 1463 1464 PetscFunctionBegin; 1465 if (flag == SOR_APPLY_UPPER) { 1466 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1467 PetscFunctionReturn(0); 1468 } 1469 1470 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1471 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1472 } 1473 1474 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1475 if (flag & SOR_ZERO_INITIAL_GUESS) { 1476 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1477 its--; 1478 } 1479 1480 while (its--) { 1481 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1482 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1483 1484 /* update rhs: bb1 = bb - B*x */ 1485 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1486 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1487 1488 /* local sweep */ 1489 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1490 } 1491 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1492 if (flag & SOR_ZERO_INITIAL_GUESS) { 1493 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1494 its--; 1495 } 1496 while (its--) { 1497 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1499 1500 /* update rhs: bb1 = bb - B*x */ 1501 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1502 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1503 1504 /* local sweep */ 1505 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1506 } 1507 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1508 if (flag & SOR_ZERO_INITIAL_GUESS) { 1509 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1510 its--; 1511 } 1512 while (its--) { 1513 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 1516 /* update rhs: bb1 = bb - B*x */ 1517 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1518 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1519 1520 /* local sweep */ 1521 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1522 } 1523 } else if (flag & SOR_EISENSTAT) { 1524 Vec xx1; 1525 1526 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1527 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1528 1529 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1531 if (!mat->diag) { 1532 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1533 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1534 } 1535 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1536 if (hasop) { 1537 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1538 } else { 1539 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1540 } 1541 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1542 1543 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1544 1545 /* local sweep */ 1546 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1547 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1548 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1549 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1550 1551 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatPermute_MPIAIJ" 1557 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1558 { 1559 Mat aA,aB,Aperm; 1560 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1561 PetscScalar *aa,*ba; 1562 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1563 PetscSF rowsf,sf; 1564 IS parcolp = NULL; 1565 PetscBool done; 1566 PetscErrorCode ierr; 1567 1568 PetscFunctionBegin; 1569 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1570 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1571 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1572 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1573 1574 /* Invert row permutation to find out where my rows should go */ 1575 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1576 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1577 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1578 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1579 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1580 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1581 1582 /* Invert column permutation to find out where my columns should go */ 1583 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1584 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1585 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1586 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1587 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1588 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1589 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1590 1591 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1592 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1593 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1594 1595 /* Find out where my gcols should go */ 1596 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1597 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1598 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1599 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1600 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1601 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1602 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1603 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1604 1605 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1606 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1607 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1608 for (i=0; i<m; i++) { 1609 PetscInt row = rdest[i],rowner; 1610 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1611 for (j=ai[i]; j<ai[i+1]; j++) { 1612 PetscInt cowner,col = cdest[aj[j]]; 1613 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1614 if (rowner == cowner) dnnz[i]++; 1615 else onnz[i]++; 1616 } 1617 for (j=bi[i]; j<bi[i+1]; j++) { 1618 PetscInt cowner,col = gcdest[bj[j]]; 1619 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1620 if (rowner == cowner) dnnz[i]++; 1621 else onnz[i]++; 1622 } 1623 } 1624 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1625 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1626 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1627 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1628 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1629 1630 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1631 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1632 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1633 for (i=0; i<m; i++) { 1634 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1635 PetscInt j0,rowlen; 1636 rowlen = ai[i+1] - ai[i]; 1637 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1638 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1639 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1640 } 1641 rowlen = bi[i+1] - bi[i]; 1642 for (j0=j=0; j<rowlen; j0=j) { 1643 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1644 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1645 } 1646 } 1647 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1648 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1649 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1650 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1651 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1652 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1653 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1654 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1655 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1656 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1657 *B = Aperm; 1658 PetscFunctionReturn(0); 1659 } 1660 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1663 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1664 { 1665 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1666 Mat A = mat->A,B = mat->B; 1667 PetscErrorCode ierr; 1668 PetscReal isend[5],irecv[5]; 1669 1670 PetscFunctionBegin; 1671 info->block_size = 1.0; 1672 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1673 1674 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1675 isend[3] = info->memory; isend[4] = info->mallocs; 1676 1677 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1678 1679 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1680 isend[3] += info->memory; isend[4] += info->mallocs; 1681 if (flag == MAT_LOCAL) { 1682 info->nz_used = isend[0]; 1683 info->nz_allocated = isend[1]; 1684 info->nz_unneeded = isend[2]; 1685 info->memory = isend[3]; 1686 info->mallocs = isend[4]; 1687 } else if (flag == MAT_GLOBAL_MAX) { 1688 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1689 1690 info->nz_used = irecv[0]; 1691 info->nz_allocated = irecv[1]; 1692 info->nz_unneeded = irecv[2]; 1693 info->memory = irecv[3]; 1694 info->mallocs = irecv[4]; 1695 } else if (flag == MAT_GLOBAL_SUM) { 1696 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1697 1698 info->nz_used = irecv[0]; 1699 info->nz_allocated = irecv[1]; 1700 info->nz_unneeded = irecv[2]; 1701 info->memory = irecv[3]; 1702 info->mallocs = irecv[4]; 1703 } 1704 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1705 info->fill_ratio_needed = 0; 1706 info->factor_mallocs = 0; 1707 PetscFunctionReturn(0); 1708 } 1709 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatSetOption_MPIAIJ" 1712 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1713 { 1714 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 switch (op) { 1719 case MAT_NEW_NONZERO_LOCATIONS: 1720 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1721 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1722 case MAT_KEEP_NONZERO_PATTERN: 1723 case MAT_NEW_NONZERO_LOCATION_ERR: 1724 case MAT_USE_INODES: 1725 case MAT_IGNORE_ZERO_ENTRIES: 1726 MatCheckPreallocated(A,1); 1727 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1728 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1729 break; 1730 case MAT_ROW_ORIENTED: 1731 a->roworiented = flg; 1732 1733 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1734 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1735 break; 1736 case MAT_NEW_DIAGONALS: 1737 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1738 break; 1739 case MAT_IGNORE_OFF_PROC_ENTRIES: 1740 a->donotstash = flg; 1741 break; 1742 case MAT_SPD: 1743 A->spd_set = PETSC_TRUE; 1744 A->spd = flg; 1745 if (flg) { 1746 A->symmetric = PETSC_TRUE; 1747 A->structurally_symmetric = PETSC_TRUE; 1748 A->symmetric_set = PETSC_TRUE; 1749 A->structurally_symmetric_set = PETSC_TRUE; 1750 } 1751 break; 1752 case MAT_SYMMETRIC: 1753 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1754 break; 1755 case MAT_STRUCTURALLY_SYMMETRIC: 1756 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1757 break; 1758 case MAT_HERMITIAN: 1759 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1760 break; 1761 case MAT_SYMMETRY_ETERNAL: 1762 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1763 break; 1764 default: 1765 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1766 } 1767 PetscFunctionReturn(0); 1768 } 1769 1770 #undef __FUNCT__ 1771 #define __FUNCT__ "MatGetRow_MPIAIJ" 1772 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1773 { 1774 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1775 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1776 PetscErrorCode ierr; 1777 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1778 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1779 PetscInt *cmap,*idx_p; 1780 1781 PetscFunctionBegin; 1782 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1783 mat->getrowactive = PETSC_TRUE; 1784 1785 if (!mat->rowvalues && (idx || v)) { 1786 /* 1787 allocate enough space to hold information from the longest row. 1788 */ 1789 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1790 PetscInt max = 1,tmp; 1791 for (i=0; i<matin->rmap->n; i++) { 1792 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1793 if (max < tmp) max = tmp; 1794 } 1795 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1796 } 1797 1798 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1799 lrow = row - rstart; 1800 1801 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1802 if (!v) {pvA = 0; pvB = 0;} 1803 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1804 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1805 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1806 nztot = nzA + nzB; 1807 1808 cmap = mat->garray; 1809 if (v || idx) { 1810 if (nztot) { 1811 /* Sort by increasing column numbers, assuming A and B already sorted */ 1812 PetscInt imark = -1; 1813 if (v) { 1814 *v = v_p = mat->rowvalues; 1815 for (i=0; i<nzB; i++) { 1816 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1817 else break; 1818 } 1819 imark = i; 1820 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1821 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1822 } 1823 if (idx) { 1824 *idx = idx_p = mat->rowindices; 1825 if (imark > -1) { 1826 for (i=0; i<imark; i++) { 1827 idx_p[i] = cmap[cworkB[i]]; 1828 } 1829 } else { 1830 for (i=0; i<nzB; i++) { 1831 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1832 else break; 1833 } 1834 imark = i; 1835 } 1836 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1837 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1838 } 1839 } else { 1840 if (idx) *idx = 0; 1841 if (v) *v = 0; 1842 } 1843 } 1844 *nz = nztot; 1845 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1846 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1852 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1853 { 1854 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1855 1856 PetscFunctionBegin; 1857 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1858 aij->getrowactive = PETSC_FALSE; 1859 PetscFunctionReturn(0); 1860 } 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatNorm_MPIAIJ" 1864 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1865 { 1866 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1867 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1868 PetscErrorCode ierr; 1869 PetscInt i,j,cstart = mat->cmap->rstart; 1870 PetscReal sum = 0.0; 1871 MatScalar *v; 1872 1873 PetscFunctionBegin; 1874 if (aij->size == 1) { 1875 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1876 } else { 1877 if (type == NORM_FROBENIUS) { 1878 v = amat->a; 1879 for (i=0; i<amat->nz; i++) { 1880 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1881 } 1882 v = bmat->a; 1883 for (i=0; i<bmat->nz; i++) { 1884 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1885 } 1886 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1887 *norm = PetscSqrtReal(*norm); 1888 } else if (type == NORM_1) { /* max column norm */ 1889 PetscReal *tmp,*tmp2; 1890 PetscInt *jj,*garray = aij->garray; 1891 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1892 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1893 *norm = 0.0; 1894 v = amat->a; jj = amat->j; 1895 for (j=0; j<amat->nz; j++) { 1896 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1897 } 1898 v = bmat->a; jj = bmat->j; 1899 for (j=0; j<bmat->nz; j++) { 1900 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1901 } 1902 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1903 for (j=0; j<mat->cmap->N; j++) { 1904 if (tmp2[j] > *norm) *norm = tmp2[j]; 1905 } 1906 ierr = PetscFree(tmp);CHKERRQ(ierr); 1907 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1908 } else if (type == NORM_INFINITY) { /* max row norm */ 1909 PetscReal ntemp = 0.0; 1910 for (j=0; j<aij->A->rmap->n; j++) { 1911 v = amat->a + amat->i[j]; 1912 sum = 0.0; 1913 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1914 sum += PetscAbsScalar(*v); v++; 1915 } 1916 v = bmat->a + bmat->i[j]; 1917 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1918 sum += PetscAbsScalar(*v); v++; 1919 } 1920 if (sum > ntemp) ntemp = sum; 1921 } 1922 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1923 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1924 } 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "MatTranspose_MPIAIJ" 1930 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1931 { 1932 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1933 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1934 PetscErrorCode ierr; 1935 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1936 PetscInt cstart = A->cmap->rstart,ncol; 1937 Mat B; 1938 MatScalar *array; 1939 1940 PetscFunctionBegin; 1941 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1942 1943 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1944 ai = Aloc->i; aj = Aloc->j; 1945 bi = Bloc->i; bj = Bloc->j; 1946 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1947 PetscInt *d_nnz,*g_nnz,*o_nnz; 1948 PetscSFNode *oloc; 1949 PETSC_UNUSED PetscSF sf; 1950 1951 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1952 /* compute d_nnz for preallocation */ 1953 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1954 for (i=0; i<ai[ma]; i++) { 1955 d_nnz[aj[i]]++; 1956 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1957 } 1958 /* compute local off-diagonal contributions */ 1959 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1960 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1961 /* map those to global */ 1962 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1963 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1964 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1965 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1966 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1967 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1968 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1969 1970 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1971 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1972 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1973 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1974 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1975 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1976 } else { 1977 B = *matout; 1978 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1979 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1980 } 1981 1982 /* copy over the A part */ 1983 array = Aloc->a; 1984 row = A->rmap->rstart; 1985 for (i=0; i<ma; i++) { 1986 ncol = ai[i+1]-ai[i]; 1987 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1988 row++; 1989 array += ncol; aj += ncol; 1990 } 1991 aj = Aloc->j; 1992 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1993 1994 /* copy over the B part */ 1995 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1996 array = Bloc->a; 1997 row = A->rmap->rstart; 1998 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1999 cols_tmp = cols; 2000 for (i=0; i<mb; i++) { 2001 ncol = bi[i+1]-bi[i]; 2002 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2003 row++; 2004 array += ncol; cols_tmp += ncol; 2005 } 2006 ierr = PetscFree(cols);CHKERRQ(ierr); 2007 2008 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2009 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2010 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2011 *matout = B; 2012 } else { 2013 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2014 } 2015 PetscFunctionReturn(0); 2016 } 2017 2018 #undef __FUNCT__ 2019 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2020 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2021 { 2022 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2023 Mat a = aij->A,b = aij->B; 2024 PetscErrorCode ierr; 2025 PetscInt s1,s2,s3; 2026 2027 PetscFunctionBegin; 2028 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2029 if (rr) { 2030 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2031 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2032 /* Overlap communication with computation. */ 2033 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2034 } 2035 if (ll) { 2036 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2037 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2038 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2039 } 2040 /* scale the diagonal block */ 2041 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2042 2043 if (rr) { 2044 /* Do a scatter end and then right scale the off-diagonal block */ 2045 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2046 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2047 } 2048 PetscFunctionReturn(0); 2049 } 2050 2051 #undef __FUNCT__ 2052 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2053 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2054 { 2055 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2056 PetscErrorCode ierr; 2057 2058 PetscFunctionBegin; 2059 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 #undef __FUNCT__ 2064 #define __FUNCT__ "MatEqual_MPIAIJ" 2065 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2066 { 2067 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2068 Mat a,b,c,d; 2069 PetscBool flg; 2070 PetscErrorCode ierr; 2071 2072 PetscFunctionBegin; 2073 a = matA->A; b = matA->B; 2074 c = matB->A; d = matB->B; 2075 2076 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2077 if (flg) { 2078 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2079 } 2080 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2081 PetscFunctionReturn(0); 2082 } 2083 2084 #undef __FUNCT__ 2085 #define __FUNCT__ "MatCopy_MPIAIJ" 2086 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2087 { 2088 PetscErrorCode ierr; 2089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2090 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2091 2092 PetscFunctionBegin; 2093 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2094 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2095 /* because of the column compression in the off-processor part of the matrix a->B, 2096 the number of columns in a->B and b->B may be different, hence we cannot call 2097 the MatCopy() directly on the two parts. If need be, we can provide a more 2098 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2099 then copying the submatrices */ 2100 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2101 } else { 2102 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2103 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2104 } 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "MatSetUp_MPIAIJ" 2110 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2111 { 2112 PetscErrorCode ierr; 2113 2114 PetscFunctionBegin; 2115 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 /* 2120 Computes the number of nonzeros per row needed for preallocation when X and Y 2121 have different nonzero structure. 2122 */ 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2125 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2126 { 2127 PetscInt i,j,k,nzx,nzy; 2128 2129 PetscFunctionBegin; 2130 /* Set the number of nonzeros in the new matrix */ 2131 for (i=0; i<m; i++) { 2132 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2133 nzx = xi[i+1] - xi[i]; 2134 nzy = yi[i+1] - yi[i]; 2135 nnz[i] = 0; 2136 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2137 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2138 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2139 nnz[i]++; 2140 } 2141 for (; k<nzy; k++) nnz[i]++; 2142 } 2143 PetscFunctionReturn(0); 2144 } 2145 2146 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2147 #undef __FUNCT__ 2148 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2149 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2150 { 2151 PetscErrorCode ierr; 2152 PetscInt m = Y->rmap->N; 2153 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2154 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2155 2156 PetscFunctionBegin; 2157 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatAXPY_MPIAIJ" 2163 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2164 { 2165 PetscErrorCode ierr; 2166 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2167 PetscBLASInt bnz,one=1; 2168 Mat_SeqAIJ *x,*y; 2169 2170 PetscFunctionBegin; 2171 if (str == SAME_NONZERO_PATTERN) { 2172 PetscScalar alpha = a; 2173 x = (Mat_SeqAIJ*)xx->A->data; 2174 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2175 y = (Mat_SeqAIJ*)yy->A->data; 2176 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2177 x = (Mat_SeqAIJ*)xx->B->data; 2178 y = (Mat_SeqAIJ*)yy->B->data; 2179 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2180 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2181 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2182 } else if (str == SUBSET_NONZERO_PATTERN) { 2183 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2184 } else { 2185 Mat B; 2186 PetscInt *nnz_d,*nnz_o; 2187 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2188 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2189 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2190 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2191 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2192 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2193 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2194 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2195 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2196 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2197 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2198 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2199 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2200 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2201 } 2202 PetscFunctionReturn(0); 2203 } 2204 2205 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2206 2207 #undef __FUNCT__ 2208 #define __FUNCT__ "MatConjugate_MPIAIJ" 2209 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2210 { 2211 #if defined(PETSC_USE_COMPLEX) 2212 PetscErrorCode ierr; 2213 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2214 2215 PetscFunctionBegin; 2216 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2217 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2218 #else 2219 PetscFunctionBegin; 2220 #endif 2221 PetscFunctionReturn(0); 2222 } 2223 2224 #undef __FUNCT__ 2225 #define __FUNCT__ "MatRealPart_MPIAIJ" 2226 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2227 { 2228 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2229 PetscErrorCode ierr; 2230 2231 PetscFunctionBegin; 2232 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2233 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2234 PetscFunctionReturn(0); 2235 } 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2239 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2240 { 2241 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2242 PetscErrorCode ierr; 2243 2244 PetscFunctionBegin; 2245 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2246 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #if defined(PETSC_HAVE_PBGL) 2251 2252 #include <boost/parallel/mpi/bsp_process_group.hpp> 2253 #include <boost/graph/distributed/ilu_default_graph.hpp> 2254 #include <boost/graph/distributed/ilu_0_block.hpp> 2255 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2256 #include <boost/graph/distributed/petsc/interface.hpp> 2257 #include <boost/multi_array.hpp> 2258 #include <boost/parallel/distributed_property_map->hpp> 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2262 /* 2263 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2264 */ 2265 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2266 { 2267 namespace petsc = boost::distributed::petsc; 2268 2269 namespace graph_dist = boost::graph::distributed; 2270 using boost::graph::distributed::ilu_default::process_group_type; 2271 using boost::graph::ilu_permuted; 2272 2273 PetscBool row_identity, col_identity; 2274 PetscContainer c; 2275 PetscInt m, n, M, N; 2276 PetscErrorCode ierr; 2277 2278 PetscFunctionBegin; 2279 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2280 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2281 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2282 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2283 2284 process_group_type pg; 2285 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2286 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2287 lgraph_type& level_graph = *lgraph_p; 2288 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2289 2290 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2291 ilu_permuted(level_graph); 2292 2293 /* put together the new matrix */ 2294 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2295 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2296 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2297 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2298 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2299 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2300 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2301 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2302 2303 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2304 ierr = PetscContainerSetPointer(c, lgraph_p); 2305 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2306 ierr = PetscContainerDestroy(&c); 2307 PetscFunctionReturn(0); 2308 } 2309 2310 #undef __FUNCT__ 2311 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2312 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2313 { 2314 PetscFunctionBegin; 2315 PetscFunctionReturn(0); 2316 } 2317 2318 #undef __FUNCT__ 2319 #define __FUNCT__ "MatSolve_MPIAIJ" 2320 /* 2321 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2322 */ 2323 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2324 { 2325 namespace graph_dist = boost::graph::distributed; 2326 2327 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2328 lgraph_type *lgraph_p; 2329 PetscContainer c; 2330 PetscErrorCode ierr; 2331 2332 PetscFunctionBegin; 2333 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2334 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2335 ierr = VecCopy(b, x);CHKERRQ(ierr); 2336 2337 PetscScalar *array_x; 2338 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2339 PetscInt sx; 2340 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2341 2342 PetscScalar *array_b; 2343 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2344 PetscInt sb; 2345 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2346 2347 lgraph_type& level_graph = *lgraph_p; 2348 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2349 2350 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2351 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2352 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2353 2354 typedef boost::iterator_property_map<array_ref_type::iterator, 2355 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2356 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2357 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2358 2359 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2360 PetscFunctionReturn(0); 2361 } 2362 #endif 2363 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2367 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2368 { 2369 PetscMPIInt rank,size; 2370 MPI_Comm comm; 2371 PetscErrorCode ierr; 2372 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2373 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2374 PetscInt *rowrange = mat->rmap->range; 2375 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2376 Mat A = aij->A,B=aij->B,C=*matredundant; 2377 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2378 PetscScalar *sbuf_a; 2379 PetscInt nzlocal=a->nz+b->nz; 2380 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2381 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2382 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2383 MatScalar *aworkA,*aworkB; 2384 PetscScalar *vals; 2385 PetscMPIInt tag1,tag2,tag3,imdex; 2386 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2387 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2388 MPI_Status recv_status,*send_status; 2389 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2390 PetscInt **rbuf_j=NULL; 2391 PetscScalar **rbuf_a=NULL; 2392 Mat_Redundant *redund =NULL; 2393 2394 PetscFunctionBegin; 2395 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2396 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2397 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2398 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2399 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2400 2401 if (reuse == MAT_REUSE_MATRIX) { 2402 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2403 if (subsize == 1) { 2404 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2405 redund = c->redundant; 2406 } else { 2407 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2408 redund = c->redundant; 2409 } 2410 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2411 2412 nsends = redund->nsends; 2413 nrecvs = redund->nrecvs; 2414 send_rank = redund->send_rank; 2415 recv_rank = redund->recv_rank; 2416 sbuf_nz = redund->sbuf_nz; 2417 rbuf_nz = redund->rbuf_nz; 2418 sbuf_j = redund->sbuf_j; 2419 sbuf_a = redund->sbuf_a; 2420 rbuf_j = redund->rbuf_j; 2421 rbuf_a = redund->rbuf_a; 2422 } 2423 2424 if (reuse == MAT_INITIAL_MATRIX) { 2425 PetscInt nleftover,np_subcomm; 2426 2427 /* get the destination processors' id send_rank, nsends and nrecvs */ 2428 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2429 2430 np_subcomm = size/nsubcomm; 2431 nleftover = size - nsubcomm*np_subcomm; 2432 2433 /* block of codes below is specific for INTERLACED */ 2434 /* ------------------------------------------------*/ 2435 nsends = 0; nrecvs = 0; 2436 for (i=0; i<size; i++) { 2437 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2438 send_rank[nsends++] = i; 2439 recv_rank[nrecvs++] = i; 2440 } 2441 } 2442 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2443 i = size-nleftover-1; 2444 j = 0; 2445 while (j < nsubcomm - nleftover) { 2446 send_rank[nsends++] = i; 2447 i--; j++; 2448 } 2449 } 2450 2451 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2452 for (i=0; i<nleftover; i++) { 2453 recv_rank[nrecvs++] = size-nleftover+i; 2454 } 2455 } 2456 /*----------------------------------------------*/ 2457 2458 /* allocate sbuf_j, sbuf_a */ 2459 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2460 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2461 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2462 /* 2463 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2464 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2465 */ 2466 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2467 2468 /* copy mat's local entries into the buffers */ 2469 if (reuse == MAT_INITIAL_MATRIX) { 2470 rownz_max = 0; 2471 rptr = sbuf_j; 2472 cols = sbuf_j + rend-rstart + 1; 2473 vals = sbuf_a; 2474 rptr[0] = 0; 2475 for (i=0; i<rend-rstart; i++) { 2476 row = i + rstart; 2477 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2478 ncols = nzA + nzB; 2479 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2480 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2481 /* load the column indices for this row into cols */ 2482 lwrite = 0; 2483 for (l=0; l<nzB; l++) { 2484 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2485 vals[lwrite] = aworkB[l]; 2486 cols[lwrite++] = ctmp; 2487 } 2488 } 2489 for (l=0; l<nzA; l++) { 2490 vals[lwrite] = aworkA[l]; 2491 cols[lwrite++] = cstart + cworkA[l]; 2492 } 2493 for (l=0; l<nzB; l++) { 2494 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2495 vals[lwrite] = aworkB[l]; 2496 cols[lwrite++] = ctmp; 2497 } 2498 } 2499 vals += ncols; 2500 cols += ncols; 2501 rptr[i+1] = rptr[i] + ncols; 2502 if (rownz_max < ncols) rownz_max = ncols; 2503 } 2504 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2505 } else { /* only copy matrix values into sbuf_a */ 2506 rptr = sbuf_j; 2507 vals = sbuf_a; 2508 rptr[0] = 0; 2509 for (i=0; i<rend-rstart; i++) { 2510 row = i + rstart; 2511 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2512 ncols = nzA + nzB; 2513 cworkB = b->j + b->i[i]; 2514 aworkA = a->a + a->i[i]; 2515 aworkB = b->a + b->i[i]; 2516 lwrite = 0; 2517 for (l=0; l<nzB; l++) { 2518 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2519 } 2520 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2521 for (l=0; l<nzB; l++) { 2522 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2523 } 2524 vals += ncols; 2525 rptr[i+1] = rptr[i] + ncols; 2526 } 2527 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2528 2529 /* send nzlocal to others, and recv other's nzlocal */ 2530 /*--------------------------------------------------*/ 2531 if (reuse == MAT_INITIAL_MATRIX) { 2532 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2533 2534 s_waits2 = s_waits3 + nsends; 2535 s_waits1 = s_waits2 + nsends; 2536 r_waits1 = s_waits1 + nsends; 2537 r_waits2 = r_waits1 + nrecvs; 2538 r_waits3 = r_waits2 + nrecvs; 2539 } else { 2540 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2541 2542 r_waits3 = s_waits3 + nsends; 2543 } 2544 2545 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2546 if (reuse == MAT_INITIAL_MATRIX) { 2547 /* get new tags to keep the communication clean */ 2548 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2549 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2550 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2551 2552 /* post receives of other's nzlocal */ 2553 for (i=0; i<nrecvs; i++) { 2554 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2555 } 2556 /* send nzlocal to others */ 2557 for (i=0; i<nsends; i++) { 2558 sbuf_nz[i] = nzlocal; 2559 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2560 } 2561 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2562 count = nrecvs; 2563 while (count) { 2564 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2565 2566 recv_rank[imdex] = recv_status.MPI_SOURCE; 2567 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2568 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2569 2570 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2571 2572 rbuf_nz[imdex] += i + 2; 2573 2574 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2575 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2576 count--; 2577 } 2578 /* wait on sends of nzlocal */ 2579 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2580 /* send mat->i,j to others, and recv from other's */ 2581 /*------------------------------------------------*/ 2582 for (i=0; i<nsends; i++) { 2583 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2584 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2585 } 2586 /* wait on receives of mat->i,j */ 2587 /*------------------------------*/ 2588 count = nrecvs; 2589 while (count) { 2590 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2591 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2592 count--; 2593 } 2594 /* wait on sends of mat->i,j */ 2595 /*---------------------------*/ 2596 if (nsends) { 2597 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2598 } 2599 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2600 2601 /* post receives, send and receive mat->a */ 2602 /*----------------------------------------*/ 2603 for (imdex=0; imdex<nrecvs; imdex++) { 2604 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2605 } 2606 for (i=0; i<nsends; i++) { 2607 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2608 } 2609 count = nrecvs; 2610 while (count) { 2611 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2612 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2613 count--; 2614 } 2615 if (nsends) { 2616 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2617 } 2618 2619 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2620 2621 /* create redundant matrix */ 2622 /*-------------------------*/ 2623 if (reuse == MAT_INITIAL_MATRIX) { 2624 const PetscInt *range; 2625 PetscInt rstart_sub,rend_sub,mloc_sub; 2626 2627 /* compute rownz_max for preallocation */ 2628 for (imdex=0; imdex<nrecvs; imdex++) { 2629 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2630 rptr = rbuf_j[imdex]; 2631 for (i=0; i<j; i++) { 2632 ncols = rptr[i+1] - rptr[i]; 2633 if (rownz_max < ncols) rownz_max = ncols; 2634 } 2635 } 2636 2637 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2638 2639 /* get local size of redundant matrix 2640 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2641 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2642 rstart_sub = range[nsubcomm*subrank]; 2643 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2644 rend_sub = range[nsubcomm*(subrank+1)]; 2645 } else { 2646 rend_sub = mat->rmap->N; 2647 } 2648 mloc_sub = rend_sub - rstart_sub; 2649 2650 if (M == N) { 2651 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2652 } else { /* non-square matrix */ 2653 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2654 } 2655 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2656 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2657 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2658 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2659 } else { 2660 C = *matredundant; 2661 } 2662 2663 /* insert local matrix entries */ 2664 rptr = sbuf_j; 2665 cols = sbuf_j + rend-rstart + 1; 2666 vals = sbuf_a; 2667 for (i=0; i<rend-rstart; i++) { 2668 row = i + rstart; 2669 ncols = rptr[i+1] - rptr[i]; 2670 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2671 vals += ncols; 2672 cols += ncols; 2673 } 2674 /* insert received matrix entries */ 2675 for (imdex=0; imdex<nrecvs; imdex++) { 2676 rstart = rowrange[recv_rank[imdex]]; 2677 rend = rowrange[recv_rank[imdex]+1]; 2678 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2679 rptr = rbuf_j[imdex]; 2680 cols = rbuf_j[imdex] + rend-rstart + 1; 2681 vals = rbuf_a[imdex]; 2682 for (i=0; i<rend-rstart; i++) { 2683 row = i + rstart; 2684 ncols = rptr[i+1] - rptr[i]; 2685 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2686 vals += ncols; 2687 cols += ncols; 2688 } 2689 } 2690 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2691 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2692 2693 if (reuse == MAT_INITIAL_MATRIX) { 2694 *matredundant = C; 2695 2696 /* create a supporting struct and attach it to C for reuse */ 2697 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2698 if (subsize == 1) { 2699 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2700 c->redundant = redund; 2701 } else { 2702 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2703 c->redundant = redund; 2704 } 2705 2706 redund->nzlocal = nzlocal; 2707 redund->nsends = nsends; 2708 redund->nrecvs = nrecvs; 2709 redund->send_rank = send_rank; 2710 redund->recv_rank = recv_rank; 2711 redund->sbuf_nz = sbuf_nz; 2712 redund->rbuf_nz = rbuf_nz; 2713 redund->sbuf_j = sbuf_j; 2714 redund->sbuf_a = sbuf_a; 2715 redund->rbuf_j = rbuf_j; 2716 redund->rbuf_a = rbuf_a; 2717 redund->psubcomm = NULL; 2718 } 2719 PetscFunctionReturn(0); 2720 } 2721 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2724 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2725 { 2726 PetscErrorCode ierr; 2727 MPI_Comm comm; 2728 PetscMPIInt size,subsize; 2729 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2730 Mat_Redundant *redund=NULL; 2731 PetscSubcomm psubcomm=NULL; 2732 MPI_Comm subcomm_in=subcomm; 2733 Mat *matseq; 2734 IS isrow,iscol; 2735 2736 PetscFunctionBegin; 2737 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2738 if (reuse == MAT_INITIAL_MATRIX) { 2739 /* create psubcomm, then get subcomm */ 2740 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2741 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2742 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2743 2744 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2745 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2746 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2747 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2748 subcomm = psubcomm->comm; 2749 } else { /* retrieve psubcomm and subcomm */ 2750 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2751 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2752 if (subsize == 1) { 2753 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2754 redund = c->redundant; 2755 } else { 2756 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2757 redund = c->redundant; 2758 } 2759 psubcomm = redund->psubcomm; 2760 } 2761 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2762 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2763 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2764 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2765 if (subsize == 1) { 2766 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2767 c->redundant->psubcomm = psubcomm; 2768 } else { 2769 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2770 c->redundant->psubcomm = psubcomm ; 2771 } 2772 } 2773 PetscFunctionReturn(0); 2774 } 2775 } 2776 2777 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2778 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2779 if (reuse == MAT_INITIAL_MATRIX) { 2780 /* create a local sequential matrix matseq[0] */ 2781 mloc_sub = PETSC_DECIDE; 2782 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2783 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2784 rstart = rend - mloc_sub; 2785 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2786 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2787 } else { /* reuse == MAT_REUSE_MATRIX */ 2788 if (subsize == 1) { 2789 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2790 redund = c->redundant; 2791 } else { 2792 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2793 redund = c->redundant; 2794 } 2795 2796 isrow = redund->isrow; 2797 iscol = redund->iscol; 2798 matseq = redund->matseq; 2799 } 2800 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2801 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2802 2803 if (reuse == MAT_INITIAL_MATRIX) { 2804 /* create a supporting struct and attach it to C for reuse */ 2805 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2806 if (subsize == 1) { 2807 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2808 c->redundant = redund; 2809 } else { 2810 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2811 c->redundant = redund; 2812 } 2813 redund->isrow = isrow; 2814 redund->iscol = iscol; 2815 redund->matseq = matseq; 2816 redund->psubcomm = psubcomm; 2817 } 2818 PetscFunctionReturn(0); 2819 } 2820 2821 #undef __FUNCT__ 2822 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2823 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2824 { 2825 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2826 PetscErrorCode ierr; 2827 PetscInt i,*idxb = 0; 2828 PetscScalar *va,*vb; 2829 Vec vtmp; 2830 2831 PetscFunctionBegin; 2832 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2833 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2834 if (idx) { 2835 for (i=0; i<A->rmap->n; i++) { 2836 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2837 } 2838 } 2839 2840 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2841 if (idx) { 2842 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2843 } 2844 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2845 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2846 2847 for (i=0; i<A->rmap->n; i++) { 2848 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2849 va[i] = vb[i]; 2850 if (idx) idx[i] = a->garray[idxb[i]]; 2851 } 2852 } 2853 2854 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2855 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2856 ierr = PetscFree(idxb);CHKERRQ(ierr); 2857 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2858 PetscFunctionReturn(0); 2859 } 2860 2861 #undef __FUNCT__ 2862 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2863 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2864 { 2865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2866 PetscErrorCode ierr; 2867 PetscInt i,*idxb = 0; 2868 PetscScalar *va,*vb; 2869 Vec vtmp; 2870 2871 PetscFunctionBegin; 2872 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2873 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2874 if (idx) { 2875 for (i=0; i<A->cmap->n; i++) { 2876 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2877 } 2878 } 2879 2880 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2881 if (idx) { 2882 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2883 } 2884 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2885 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2886 2887 for (i=0; i<A->rmap->n; i++) { 2888 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2889 va[i] = vb[i]; 2890 if (idx) idx[i] = a->garray[idxb[i]]; 2891 } 2892 } 2893 2894 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2895 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2896 ierr = PetscFree(idxb);CHKERRQ(ierr); 2897 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2898 PetscFunctionReturn(0); 2899 } 2900 2901 #undef __FUNCT__ 2902 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2903 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2904 { 2905 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2906 PetscInt n = A->rmap->n; 2907 PetscInt cstart = A->cmap->rstart; 2908 PetscInt *cmap = mat->garray; 2909 PetscInt *diagIdx, *offdiagIdx; 2910 Vec diagV, offdiagV; 2911 PetscScalar *a, *diagA, *offdiagA; 2912 PetscInt r; 2913 PetscErrorCode ierr; 2914 2915 PetscFunctionBegin; 2916 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2917 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2918 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2919 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2920 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2921 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2922 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2923 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2924 for (r = 0; r < n; ++r) { 2925 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2926 a[r] = diagA[r]; 2927 idx[r] = cstart + diagIdx[r]; 2928 } else { 2929 a[r] = offdiagA[r]; 2930 idx[r] = cmap[offdiagIdx[r]]; 2931 } 2932 } 2933 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2934 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2935 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2936 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2937 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2938 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2939 PetscFunctionReturn(0); 2940 } 2941 2942 #undef __FUNCT__ 2943 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2944 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2945 { 2946 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2947 PetscInt n = A->rmap->n; 2948 PetscInt cstart = A->cmap->rstart; 2949 PetscInt *cmap = mat->garray; 2950 PetscInt *diagIdx, *offdiagIdx; 2951 Vec diagV, offdiagV; 2952 PetscScalar *a, *diagA, *offdiagA; 2953 PetscInt r; 2954 PetscErrorCode ierr; 2955 2956 PetscFunctionBegin; 2957 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2958 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2959 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2960 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2961 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2962 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2963 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2964 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2965 for (r = 0; r < n; ++r) { 2966 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2967 a[r] = diagA[r]; 2968 idx[r] = cstart + diagIdx[r]; 2969 } else { 2970 a[r] = offdiagA[r]; 2971 idx[r] = cmap[offdiagIdx[r]]; 2972 } 2973 } 2974 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2975 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2976 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2977 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2978 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2979 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2980 PetscFunctionReturn(0); 2981 } 2982 2983 #undef __FUNCT__ 2984 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2985 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2986 { 2987 PetscErrorCode ierr; 2988 Mat *dummy; 2989 2990 PetscFunctionBegin; 2991 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2992 *newmat = *dummy; 2993 ierr = PetscFree(dummy);CHKERRQ(ierr); 2994 PetscFunctionReturn(0); 2995 } 2996 2997 #undef __FUNCT__ 2998 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2999 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3000 { 3001 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3002 PetscErrorCode ierr; 3003 3004 PetscFunctionBegin; 3005 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3006 PetscFunctionReturn(0); 3007 } 3008 3009 #undef __FUNCT__ 3010 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3011 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3012 { 3013 PetscErrorCode ierr; 3014 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3015 3016 PetscFunctionBegin; 3017 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3018 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3019 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3020 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3021 PetscFunctionReturn(0); 3022 } 3023 3024 /* -------------------------------------------------------------------*/ 3025 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3026 MatGetRow_MPIAIJ, 3027 MatRestoreRow_MPIAIJ, 3028 MatMult_MPIAIJ, 3029 /* 4*/ MatMultAdd_MPIAIJ, 3030 MatMultTranspose_MPIAIJ, 3031 MatMultTransposeAdd_MPIAIJ, 3032 #if defined(PETSC_HAVE_PBGL) 3033 MatSolve_MPIAIJ, 3034 #else 3035 0, 3036 #endif 3037 0, 3038 0, 3039 /*10*/ 0, 3040 0, 3041 0, 3042 MatSOR_MPIAIJ, 3043 MatTranspose_MPIAIJ, 3044 /*15*/ MatGetInfo_MPIAIJ, 3045 MatEqual_MPIAIJ, 3046 MatGetDiagonal_MPIAIJ, 3047 MatDiagonalScale_MPIAIJ, 3048 MatNorm_MPIAIJ, 3049 /*20*/ MatAssemblyBegin_MPIAIJ, 3050 MatAssemblyEnd_MPIAIJ, 3051 MatSetOption_MPIAIJ, 3052 MatZeroEntries_MPIAIJ, 3053 /*24*/ MatZeroRows_MPIAIJ, 3054 0, 3055 #if defined(PETSC_HAVE_PBGL) 3056 0, 3057 #else 3058 0, 3059 #endif 3060 0, 3061 0, 3062 /*29*/ MatSetUp_MPIAIJ, 3063 #if defined(PETSC_HAVE_PBGL) 3064 0, 3065 #else 3066 0, 3067 #endif 3068 0, 3069 0, 3070 0, 3071 /*34*/ MatDuplicate_MPIAIJ, 3072 0, 3073 0, 3074 0, 3075 0, 3076 /*39*/ MatAXPY_MPIAIJ, 3077 MatGetSubMatrices_MPIAIJ, 3078 MatIncreaseOverlap_MPIAIJ, 3079 MatGetValues_MPIAIJ, 3080 MatCopy_MPIAIJ, 3081 /*44*/ MatGetRowMax_MPIAIJ, 3082 MatScale_MPIAIJ, 3083 0, 3084 0, 3085 MatZeroRowsColumns_MPIAIJ, 3086 /*49*/ MatSetRandom_MPIAIJ, 3087 0, 3088 0, 3089 0, 3090 0, 3091 /*54*/ MatFDColoringCreate_MPIXAIJ, 3092 0, 3093 MatSetUnfactored_MPIAIJ, 3094 MatPermute_MPIAIJ, 3095 0, 3096 /*59*/ MatGetSubMatrix_MPIAIJ, 3097 MatDestroy_MPIAIJ, 3098 MatView_MPIAIJ, 3099 0, 3100 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3101 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3102 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3103 0, 3104 0, 3105 0, 3106 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3107 MatGetRowMinAbs_MPIAIJ, 3108 0, 3109 MatSetColoring_MPIAIJ, 3110 0, 3111 MatSetValuesAdifor_MPIAIJ, 3112 /*75*/ MatFDColoringApply_AIJ, 3113 0, 3114 0, 3115 0, 3116 MatFindZeroDiagonals_MPIAIJ, 3117 /*80*/ 0, 3118 0, 3119 0, 3120 /*83*/ MatLoad_MPIAIJ, 3121 0, 3122 0, 3123 0, 3124 0, 3125 0, 3126 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3127 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3128 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3129 MatPtAP_MPIAIJ_MPIAIJ, 3130 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3131 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3132 0, 3133 0, 3134 0, 3135 0, 3136 /*99*/ 0, 3137 0, 3138 0, 3139 MatConjugate_MPIAIJ, 3140 0, 3141 /*104*/MatSetValuesRow_MPIAIJ, 3142 MatRealPart_MPIAIJ, 3143 MatImaginaryPart_MPIAIJ, 3144 0, 3145 0, 3146 /*109*/0, 3147 MatGetRedundantMatrix_MPIAIJ, 3148 MatGetRowMin_MPIAIJ, 3149 0, 3150 0, 3151 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3152 0, 3153 0, 3154 0, 3155 0, 3156 /*119*/0, 3157 0, 3158 0, 3159 0, 3160 MatGetMultiProcBlock_MPIAIJ, 3161 /*124*/MatFindNonzeroRows_MPIAIJ, 3162 MatGetColumnNorms_MPIAIJ, 3163 MatInvertBlockDiagonal_MPIAIJ, 3164 0, 3165 MatGetSubMatricesParallel_MPIAIJ, 3166 /*129*/0, 3167 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3168 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3169 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3170 0, 3171 /*134*/0, 3172 0, 3173 0, 3174 0, 3175 0, 3176 /*139*/0, 3177 0, 3178 0, 3179 MatFDColoringSetUp_MPIXAIJ 3180 }; 3181 3182 /* ----------------------------------------------------------------------------------------*/ 3183 3184 #undef __FUNCT__ 3185 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3186 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3187 { 3188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3189 PetscErrorCode ierr; 3190 3191 PetscFunctionBegin; 3192 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3193 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3194 PetscFunctionReturn(0); 3195 } 3196 3197 #undef __FUNCT__ 3198 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3199 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3200 { 3201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3202 PetscErrorCode ierr; 3203 3204 PetscFunctionBegin; 3205 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3206 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3207 PetscFunctionReturn(0); 3208 } 3209 3210 #undef __FUNCT__ 3211 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3212 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3213 { 3214 Mat_MPIAIJ *b; 3215 PetscErrorCode ierr; 3216 3217 PetscFunctionBegin; 3218 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3219 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3220 b = (Mat_MPIAIJ*)B->data; 3221 3222 if (!B->preallocated) { 3223 /* Explicitly create 2 MATSEQAIJ matrices. */ 3224 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3225 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3226 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3227 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3228 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3229 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3230 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3231 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3232 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3233 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3234 } 3235 3236 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3237 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3238 B->preallocated = PETSC_TRUE; 3239 PetscFunctionReturn(0); 3240 } 3241 3242 #undef __FUNCT__ 3243 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3244 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3245 { 3246 Mat mat; 3247 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3248 PetscErrorCode ierr; 3249 3250 PetscFunctionBegin; 3251 *newmat = 0; 3252 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3253 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3254 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3255 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3256 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3257 a = (Mat_MPIAIJ*)mat->data; 3258 3259 mat->factortype = matin->factortype; 3260 mat->assembled = PETSC_TRUE; 3261 mat->insertmode = NOT_SET_VALUES; 3262 mat->preallocated = PETSC_TRUE; 3263 3264 a->size = oldmat->size; 3265 a->rank = oldmat->rank; 3266 a->donotstash = oldmat->donotstash; 3267 a->roworiented = oldmat->roworiented; 3268 a->rowindices = 0; 3269 a->rowvalues = 0; 3270 a->getrowactive = PETSC_FALSE; 3271 3272 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3273 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3274 3275 if (oldmat->colmap) { 3276 #if defined(PETSC_USE_CTABLE) 3277 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3278 #else 3279 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3280 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3281 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3282 #endif 3283 } else a->colmap = 0; 3284 if (oldmat->garray) { 3285 PetscInt len; 3286 len = oldmat->B->cmap->n; 3287 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3288 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3289 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3290 } else a->garray = 0; 3291 3292 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3293 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3294 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3295 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3296 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3297 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3298 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3299 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3300 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3301 *newmat = mat; 3302 PetscFunctionReturn(0); 3303 } 3304 3305 3306 3307 #undef __FUNCT__ 3308 #define __FUNCT__ "MatLoad_MPIAIJ" 3309 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3310 { 3311 PetscScalar *vals,*svals; 3312 MPI_Comm comm; 3313 PetscErrorCode ierr; 3314 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3315 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3316 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3317 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3318 PetscInt cend,cstart,n,*rowners,sizesset=1; 3319 int fd; 3320 PetscInt bs = newMat->rmap->bs; 3321 3322 PetscFunctionBegin; 3323 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3324 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3325 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3326 if (!rank) { 3327 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3328 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3329 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3330 } 3331 3332 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3333 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3334 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3335 if (bs < 0) bs = 1; 3336 3337 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3338 3339 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3340 M = header[1]; N = header[2]; 3341 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3342 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3343 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3344 3345 /* If global sizes are set, check if they are consistent with that given in the file */ 3346 if (sizesset) { 3347 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3348 } 3349 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3350 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3351 3352 /* determine ownership of all (block) rows */ 3353 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3354 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3355 else m = newMat->rmap->n; /* Set by user */ 3356 3357 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3358 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3359 3360 /* First process needs enough room for process with most rows */ 3361 if (!rank) { 3362 mmax = rowners[1]; 3363 for (i=2; i<=size; i++) { 3364 mmax = PetscMax(mmax, rowners[i]); 3365 } 3366 } else mmax = -1; /* unused, but compilers complain */ 3367 3368 rowners[0] = 0; 3369 for (i=2; i<=size; i++) { 3370 rowners[i] += rowners[i-1]; 3371 } 3372 rstart = rowners[rank]; 3373 rend = rowners[rank+1]; 3374 3375 /* distribute row lengths to all processors */ 3376 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3377 if (!rank) { 3378 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3379 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3380 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3381 for (j=0; j<m; j++) { 3382 procsnz[0] += ourlens[j]; 3383 } 3384 for (i=1; i<size; i++) { 3385 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3386 /* calculate the number of nonzeros on each processor */ 3387 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3388 procsnz[i] += rowlengths[j]; 3389 } 3390 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3391 } 3392 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3393 } else { 3394 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3395 } 3396 3397 if (!rank) { 3398 /* determine max buffer needed and allocate it */ 3399 maxnz = 0; 3400 for (i=0; i<size; i++) { 3401 maxnz = PetscMax(maxnz,procsnz[i]); 3402 } 3403 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3404 3405 /* read in my part of the matrix column indices */ 3406 nz = procsnz[0]; 3407 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3408 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3409 3410 /* read in every one elses and ship off */ 3411 for (i=1; i<size; i++) { 3412 nz = procsnz[i]; 3413 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3414 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3415 } 3416 ierr = PetscFree(cols);CHKERRQ(ierr); 3417 } else { 3418 /* determine buffer space needed for message */ 3419 nz = 0; 3420 for (i=0; i<m; i++) { 3421 nz += ourlens[i]; 3422 } 3423 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3424 3425 /* receive message of column indices*/ 3426 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3427 } 3428 3429 /* determine column ownership if matrix is not square */ 3430 if (N != M) { 3431 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3432 else n = newMat->cmap->n; 3433 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3434 cstart = cend - n; 3435 } else { 3436 cstart = rstart; 3437 cend = rend; 3438 n = cend - cstart; 3439 } 3440 3441 /* loop over local rows, determining number of off diagonal entries */ 3442 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3443 jj = 0; 3444 for (i=0; i<m; i++) { 3445 for (j=0; j<ourlens[i]; j++) { 3446 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3447 jj++; 3448 } 3449 } 3450 3451 for (i=0; i<m; i++) { 3452 ourlens[i] -= offlens[i]; 3453 } 3454 if (!sizesset) { 3455 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3456 } 3457 3458 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3459 3460 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3461 3462 for (i=0; i<m; i++) { 3463 ourlens[i] += offlens[i]; 3464 } 3465 3466 if (!rank) { 3467 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3468 3469 /* read in my part of the matrix numerical values */ 3470 nz = procsnz[0]; 3471 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3472 3473 /* insert into matrix */ 3474 jj = rstart; 3475 smycols = mycols; 3476 svals = vals; 3477 for (i=0; i<m; i++) { 3478 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3479 smycols += ourlens[i]; 3480 svals += ourlens[i]; 3481 jj++; 3482 } 3483 3484 /* read in other processors and ship out */ 3485 for (i=1; i<size; i++) { 3486 nz = procsnz[i]; 3487 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3488 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3489 } 3490 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3491 } else { 3492 /* receive numeric values */ 3493 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3494 3495 /* receive message of values*/ 3496 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3497 3498 /* insert into matrix */ 3499 jj = rstart; 3500 smycols = mycols; 3501 svals = vals; 3502 for (i=0; i<m; i++) { 3503 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3504 smycols += ourlens[i]; 3505 svals += ourlens[i]; 3506 jj++; 3507 } 3508 } 3509 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3510 ierr = PetscFree(vals);CHKERRQ(ierr); 3511 ierr = PetscFree(mycols);CHKERRQ(ierr); 3512 ierr = PetscFree(rowners);CHKERRQ(ierr); 3513 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3514 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3515 PetscFunctionReturn(0); 3516 } 3517 3518 #undef __FUNCT__ 3519 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3520 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3521 { 3522 PetscErrorCode ierr; 3523 IS iscol_local; 3524 PetscInt csize; 3525 3526 PetscFunctionBegin; 3527 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3528 if (call == MAT_REUSE_MATRIX) { 3529 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3530 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3531 } else { 3532 PetscInt cbs; 3533 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3534 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3535 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3536 } 3537 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3538 if (call == MAT_INITIAL_MATRIX) { 3539 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3540 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3541 } 3542 PetscFunctionReturn(0); 3543 } 3544 3545 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3546 #undef __FUNCT__ 3547 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3548 /* 3549 Not great since it makes two copies of the submatrix, first an SeqAIJ 3550 in local and then by concatenating the local matrices the end result. 3551 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3552 3553 Note: This requires a sequential iscol with all indices. 3554 */ 3555 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3556 { 3557 PetscErrorCode ierr; 3558 PetscMPIInt rank,size; 3559 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3560 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3561 PetscBool allcolumns, colflag; 3562 Mat M,Mreuse; 3563 MatScalar *vwork,*aa; 3564 MPI_Comm comm; 3565 Mat_SeqAIJ *aij; 3566 3567 PetscFunctionBegin; 3568 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3569 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3570 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3571 3572 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3573 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3574 if (colflag && ncol == mat->cmap->N) { 3575 allcolumns = PETSC_TRUE; 3576 } else { 3577 allcolumns = PETSC_FALSE; 3578 } 3579 if (call == MAT_REUSE_MATRIX) { 3580 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3581 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3582 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3583 } else { 3584 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3585 } 3586 3587 /* 3588 m - number of local rows 3589 n - number of columns (same on all processors) 3590 rstart - first row in new global matrix generated 3591 */ 3592 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3593 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3594 if (call == MAT_INITIAL_MATRIX) { 3595 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3596 ii = aij->i; 3597 jj = aij->j; 3598 3599 /* 3600 Determine the number of non-zeros in the diagonal and off-diagonal 3601 portions of the matrix in order to do correct preallocation 3602 */ 3603 3604 /* first get start and end of "diagonal" columns */ 3605 if (csize == PETSC_DECIDE) { 3606 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3607 if (mglobal == n) { /* square matrix */ 3608 nlocal = m; 3609 } else { 3610 nlocal = n/size + ((n % size) > rank); 3611 } 3612 } else { 3613 nlocal = csize; 3614 } 3615 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3616 rstart = rend - nlocal; 3617 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3618 3619 /* next, compute all the lengths */ 3620 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3621 olens = dlens + m; 3622 for (i=0; i<m; i++) { 3623 jend = ii[i+1] - ii[i]; 3624 olen = 0; 3625 dlen = 0; 3626 for (j=0; j<jend; j++) { 3627 if (*jj < rstart || *jj >= rend) olen++; 3628 else dlen++; 3629 jj++; 3630 } 3631 olens[i] = olen; 3632 dlens[i] = dlen; 3633 } 3634 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3635 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3636 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3637 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3638 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3639 ierr = PetscFree(dlens);CHKERRQ(ierr); 3640 } else { 3641 PetscInt ml,nl; 3642 3643 M = *newmat; 3644 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3645 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3646 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3647 /* 3648 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3649 rather than the slower MatSetValues(). 3650 */ 3651 M->was_assembled = PETSC_TRUE; 3652 M->assembled = PETSC_FALSE; 3653 } 3654 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3655 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3656 ii = aij->i; 3657 jj = aij->j; 3658 aa = aij->a; 3659 for (i=0; i<m; i++) { 3660 row = rstart + i; 3661 nz = ii[i+1] - ii[i]; 3662 cwork = jj; jj += nz; 3663 vwork = aa; aa += nz; 3664 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3665 } 3666 3667 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3668 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3669 *newmat = M; 3670 3671 /* save submatrix used in processor for next request */ 3672 if (call == MAT_INITIAL_MATRIX) { 3673 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3674 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3675 } 3676 PetscFunctionReturn(0); 3677 } 3678 3679 #undef __FUNCT__ 3680 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3681 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3682 { 3683 PetscInt m,cstart, cend,j,nnz,i,d; 3684 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3685 const PetscInt *JJ; 3686 PetscScalar *values; 3687 PetscErrorCode ierr; 3688 3689 PetscFunctionBegin; 3690 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3691 3692 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3693 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3694 m = B->rmap->n; 3695 cstart = B->cmap->rstart; 3696 cend = B->cmap->rend; 3697 rstart = B->rmap->rstart; 3698 3699 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3700 3701 #if defined(PETSC_USE_DEBUGGING) 3702 for (i=0; i<m; i++) { 3703 nnz = Ii[i+1]- Ii[i]; 3704 JJ = J + Ii[i]; 3705 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3706 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3707 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3708 } 3709 #endif 3710 3711 for (i=0; i<m; i++) { 3712 nnz = Ii[i+1]- Ii[i]; 3713 JJ = J + Ii[i]; 3714 nnz_max = PetscMax(nnz_max,nnz); 3715 d = 0; 3716 for (j=0; j<nnz; j++) { 3717 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3718 } 3719 d_nnz[i] = d; 3720 o_nnz[i] = nnz - d; 3721 } 3722 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3723 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3724 3725 if (v) values = (PetscScalar*)v; 3726 else { 3727 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3728 } 3729 3730 for (i=0; i<m; i++) { 3731 ii = i + rstart; 3732 nnz = Ii[i+1]- Ii[i]; 3733 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3734 } 3735 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3736 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3737 3738 if (!v) { 3739 ierr = PetscFree(values);CHKERRQ(ierr); 3740 } 3741 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3742 PetscFunctionReturn(0); 3743 } 3744 3745 #undef __FUNCT__ 3746 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3747 /*@ 3748 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3749 (the default parallel PETSc format). 3750 3751 Collective on MPI_Comm 3752 3753 Input Parameters: 3754 + B - the matrix 3755 . i - the indices into j for the start of each local row (starts with zero) 3756 . j - the column indices for each local row (starts with zero) 3757 - v - optional values in the matrix 3758 3759 Level: developer 3760 3761 Notes: 3762 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3763 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3764 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3765 3766 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3767 3768 The format which is used for the sparse matrix input, is equivalent to a 3769 row-major ordering.. i.e for the following matrix, the input data expected is 3770 as shown: 3771 3772 1 0 0 3773 2 0 3 P0 3774 ------- 3775 4 5 6 P1 3776 3777 Process0 [P0]: rows_owned=[0,1] 3778 i = {0,1,3} [size = nrow+1 = 2+1] 3779 j = {0,0,2} [size = nz = 6] 3780 v = {1,2,3} [size = nz = 6] 3781 3782 Process1 [P1]: rows_owned=[2] 3783 i = {0,3} [size = nrow+1 = 1+1] 3784 j = {0,1,2} [size = nz = 6] 3785 v = {4,5,6} [size = nz = 6] 3786 3787 .keywords: matrix, aij, compressed row, sparse, parallel 3788 3789 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3790 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3791 @*/ 3792 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3793 { 3794 PetscErrorCode ierr; 3795 3796 PetscFunctionBegin; 3797 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3798 PetscFunctionReturn(0); 3799 } 3800 3801 #undef __FUNCT__ 3802 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3803 /*@C 3804 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3805 (the default parallel PETSc format). For good matrix assembly performance 3806 the user should preallocate the matrix storage by setting the parameters 3807 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3808 performance can be increased by more than a factor of 50. 3809 3810 Collective on MPI_Comm 3811 3812 Input Parameters: 3813 + B - the matrix 3814 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3815 (same value is used for all local rows) 3816 . d_nnz - array containing the number of nonzeros in the various rows of the 3817 DIAGONAL portion of the local submatrix (possibly different for each row) 3818 or NULL, if d_nz is used to specify the nonzero structure. 3819 The size of this array is equal to the number of local rows, i.e 'm'. 3820 For matrices that will be factored, you must leave room for (and set) 3821 the diagonal entry even if it is zero. 3822 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3823 submatrix (same value is used for all local rows). 3824 - o_nnz - array containing the number of nonzeros in the various rows of the 3825 OFF-DIAGONAL portion of the local submatrix (possibly different for 3826 each row) or NULL, if o_nz is used to specify the nonzero 3827 structure. The size of this array is equal to the number 3828 of local rows, i.e 'm'. 3829 3830 If the *_nnz parameter is given then the *_nz parameter is ignored 3831 3832 The AIJ format (also called the Yale sparse matrix format or 3833 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3834 storage. The stored row and column indices begin with zero. 3835 See Users-Manual: ch_mat for details. 3836 3837 The parallel matrix is partitioned such that the first m0 rows belong to 3838 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3839 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3840 3841 The DIAGONAL portion of the local submatrix of a processor can be defined 3842 as the submatrix which is obtained by extraction the part corresponding to 3843 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3844 first row that belongs to the processor, r2 is the last row belonging to 3845 the this processor, and c1-c2 is range of indices of the local part of a 3846 vector suitable for applying the matrix to. This is an mxn matrix. In the 3847 common case of a square matrix, the row and column ranges are the same and 3848 the DIAGONAL part is also square. The remaining portion of the local 3849 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3850 3851 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3852 3853 You can call MatGetInfo() to get information on how effective the preallocation was; 3854 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3855 You can also run with the option -info and look for messages with the string 3856 malloc in them to see if additional memory allocation was needed. 3857 3858 Example usage: 3859 3860 Consider the following 8x8 matrix with 34 non-zero values, that is 3861 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3862 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3863 as follows: 3864 3865 .vb 3866 1 2 0 | 0 3 0 | 0 4 3867 Proc0 0 5 6 | 7 0 0 | 8 0 3868 9 0 10 | 11 0 0 | 12 0 3869 ------------------------------------- 3870 13 0 14 | 15 16 17 | 0 0 3871 Proc1 0 18 0 | 19 20 21 | 0 0 3872 0 0 0 | 22 23 0 | 24 0 3873 ------------------------------------- 3874 Proc2 25 26 27 | 0 0 28 | 29 0 3875 30 0 0 | 31 32 33 | 0 34 3876 .ve 3877 3878 This can be represented as a collection of submatrices as: 3879 3880 .vb 3881 A B C 3882 D E F 3883 G H I 3884 .ve 3885 3886 Where the submatrices A,B,C are owned by proc0, D,E,F are 3887 owned by proc1, G,H,I are owned by proc2. 3888 3889 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3890 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3891 The 'M','N' parameters are 8,8, and have the same values on all procs. 3892 3893 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3894 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3895 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3896 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3897 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3898 matrix, ans [DF] as another SeqAIJ matrix. 3899 3900 When d_nz, o_nz parameters are specified, d_nz storage elements are 3901 allocated for every row of the local diagonal submatrix, and o_nz 3902 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3903 One way to choose d_nz and o_nz is to use the max nonzerors per local 3904 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3905 In this case, the values of d_nz,o_nz are: 3906 .vb 3907 proc0 : dnz = 2, o_nz = 2 3908 proc1 : dnz = 3, o_nz = 2 3909 proc2 : dnz = 1, o_nz = 4 3910 .ve 3911 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3912 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3913 for proc3. i.e we are using 12+15+10=37 storage locations to store 3914 34 values. 3915 3916 When d_nnz, o_nnz parameters are specified, the storage is specified 3917 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3918 In the above case the values for d_nnz,o_nnz are: 3919 .vb 3920 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3921 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3922 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3923 .ve 3924 Here the space allocated is sum of all the above values i.e 34, and 3925 hence pre-allocation is perfect. 3926 3927 Level: intermediate 3928 3929 .keywords: matrix, aij, compressed row, sparse, parallel 3930 3931 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3932 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3933 @*/ 3934 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3935 { 3936 PetscErrorCode ierr; 3937 3938 PetscFunctionBegin; 3939 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3940 PetscValidType(B,1); 3941 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3942 PetscFunctionReturn(0); 3943 } 3944 3945 #undef __FUNCT__ 3946 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3947 /*@ 3948 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3949 CSR format the local rows. 3950 3951 Collective on MPI_Comm 3952 3953 Input Parameters: 3954 + comm - MPI communicator 3955 . m - number of local rows (Cannot be PETSC_DECIDE) 3956 . n - This value should be the same as the local size used in creating the 3957 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3958 calculated if N is given) For square matrices n is almost always m. 3959 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3960 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3961 . i - row indices 3962 . j - column indices 3963 - a - matrix values 3964 3965 Output Parameter: 3966 . mat - the matrix 3967 3968 Level: intermediate 3969 3970 Notes: 3971 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3972 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3973 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3974 3975 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3976 3977 The format which is used for the sparse matrix input, is equivalent to a 3978 row-major ordering.. i.e for the following matrix, the input data expected is 3979 as shown: 3980 3981 1 0 0 3982 2 0 3 P0 3983 ------- 3984 4 5 6 P1 3985 3986 Process0 [P0]: rows_owned=[0,1] 3987 i = {0,1,3} [size = nrow+1 = 2+1] 3988 j = {0,0,2} [size = nz = 6] 3989 v = {1,2,3} [size = nz = 6] 3990 3991 Process1 [P1]: rows_owned=[2] 3992 i = {0,3} [size = nrow+1 = 1+1] 3993 j = {0,1,2} [size = nz = 6] 3994 v = {4,5,6} [size = nz = 6] 3995 3996 .keywords: matrix, aij, compressed row, sparse, parallel 3997 3998 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3999 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4000 @*/ 4001 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4002 { 4003 PetscErrorCode ierr; 4004 4005 PetscFunctionBegin; 4006 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4007 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4008 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4009 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4010 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4011 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4012 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4013 PetscFunctionReturn(0); 4014 } 4015 4016 #undef __FUNCT__ 4017 #define __FUNCT__ "MatCreateAIJ" 4018 /*@C 4019 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4020 (the default parallel PETSc format). For good matrix assembly performance 4021 the user should preallocate the matrix storage by setting the parameters 4022 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4023 performance can be increased by more than a factor of 50. 4024 4025 Collective on MPI_Comm 4026 4027 Input Parameters: 4028 + comm - MPI communicator 4029 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4030 This value should be the same as the local size used in creating the 4031 y vector for the matrix-vector product y = Ax. 4032 . n - This value should be the same as the local size used in creating the 4033 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4034 calculated if N is given) For square matrices n is almost always m. 4035 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4036 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4037 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4038 (same value is used for all local rows) 4039 . d_nnz - array containing the number of nonzeros in the various rows of the 4040 DIAGONAL portion of the local submatrix (possibly different for each row) 4041 or NULL, if d_nz is used to specify the nonzero structure. 4042 The size of this array is equal to the number of local rows, i.e 'm'. 4043 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4044 submatrix (same value is used for all local rows). 4045 - o_nnz - array containing the number of nonzeros in the various rows of the 4046 OFF-DIAGONAL portion of the local submatrix (possibly different for 4047 each row) or NULL, if o_nz is used to specify the nonzero 4048 structure. The size of this array is equal to the number 4049 of local rows, i.e 'm'. 4050 4051 Output Parameter: 4052 . A - the matrix 4053 4054 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4055 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4056 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4057 4058 Notes: 4059 If the *_nnz parameter is given then the *_nz parameter is ignored 4060 4061 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4062 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4063 storage requirements for this matrix. 4064 4065 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4066 processor than it must be used on all processors that share the object for 4067 that argument. 4068 4069 The user MUST specify either the local or global matrix dimensions 4070 (possibly both). 4071 4072 The parallel matrix is partitioned across processors such that the 4073 first m0 rows belong to process 0, the next m1 rows belong to 4074 process 1, the next m2 rows belong to process 2 etc.. where 4075 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4076 values corresponding to [m x N] submatrix. 4077 4078 The columns are logically partitioned with the n0 columns belonging 4079 to 0th partition, the next n1 columns belonging to the next 4080 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4081 4082 The DIAGONAL portion of the local submatrix on any given processor 4083 is the submatrix corresponding to the rows and columns m,n 4084 corresponding to the given processor. i.e diagonal matrix on 4085 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4086 etc. The remaining portion of the local submatrix [m x (N-n)] 4087 constitute the OFF-DIAGONAL portion. The example below better 4088 illustrates this concept. 4089 4090 For a square global matrix we define each processor's diagonal portion 4091 to be its local rows and the corresponding columns (a square submatrix); 4092 each processor's off-diagonal portion encompasses the remainder of the 4093 local matrix (a rectangular submatrix). 4094 4095 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4096 4097 When calling this routine with a single process communicator, a matrix of 4098 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4099 type of communicator, use the construction mechanism: 4100 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4101 4102 By default, this format uses inodes (identical nodes) when possible. 4103 We search for consecutive rows with the same nonzero structure, thereby 4104 reusing matrix information to achieve increased efficiency. 4105 4106 Options Database Keys: 4107 + -mat_no_inode - Do not use inodes 4108 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4109 - -mat_aij_oneindex - Internally use indexing starting at 1 4110 rather than 0. Note that when calling MatSetValues(), 4111 the user still MUST index entries starting at 0! 4112 4113 4114 Example usage: 4115 4116 Consider the following 8x8 matrix with 34 non-zero values, that is 4117 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4118 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4119 as follows: 4120 4121 .vb 4122 1 2 0 | 0 3 0 | 0 4 4123 Proc0 0 5 6 | 7 0 0 | 8 0 4124 9 0 10 | 11 0 0 | 12 0 4125 ------------------------------------- 4126 13 0 14 | 15 16 17 | 0 0 4127 Proc1 0 18 0 | 19 20 21 | 0 0 4128 0 0 0 | 22 23 0 | 24 0 4129 ------------------------------------- 4130 Proc2 25 26 27 | 0 0 28 | 29 0 4131 30 0 0 | 31 32 33 | 0 34 4132 .ve 4133 4134 This can be represented as a collection of submatrices as: 4135 4136 .vb 4137 A B C 4138 D E F 4139 G H I 4140 .ve 4141 4142 Where the submatrices A,B,C are owned by proc0, D,E,F are 4143 owned by proc1, G,H,I are owned by proc2. 4144 4145 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4146 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4147 The 'M','N' parameters are 8,8, and have the same values on all procs. 4148 4149 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4150 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4151 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4152 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4153 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4154 matrix, ans [DF] as another SeqAIJ matrix. 4155 4156 When d_nz, o_nz parameters are specified, d_nz storage elements are 4157 allocated for every row of the local diagonal submatrix, and o_nz 4158 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4159 One way to choose d_nz and o_nz is to use the max nonzerors per local 4160 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4161 In this case, the values of d_nz,o_nz are: 4162 .vb 4163 proc0 : dnz = 2, o_nz = 2 4164 proc1 : dnz = 3, o_nz = 2 4165 proc2 : dnz = 1, o_nz = 4 4166 .ve 4167 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4168 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4169 for proc3. i.e we are using 12+15+10=37 storage locations to store 4170 34 values. 4171 4172 When d_nnz, o_nnz parameters are specified, the storage is specified 4173 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4174 In the above case the values for d_nnz,o_nnz are: 4175 .vb 4176 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4177 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4178 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4179 .ve 4180 Here the space allocated is sum of all the above values i.e 34, and 4181 hence pre-allocation is perfect. 4182 4183 Level: intermediate 4184 4185 .keywords: matrix, aij, compressed row, sparse, parallel 4186 4187 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4188 MPIAIJ, MatCreateMPIAIJWithArrays() 4189 @*/ 4190 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4191 { 4192 PetscErrorCode ierr; 4193 PetscMPIInt size; 4194 4195 PetscFunctionBegin; 4196 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4197 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4199 if (size > 1) { 4200 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4201 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4202 } else { 4203 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4204 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4205 } 4206 PetscFunctionReturn(0); 4207 } 4208 4209 #undef __FUNCT__ 4210 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4211 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4212 { 4213 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4214 4215 PetscFunctionBegin; 4216 if (Ad) *Ad = a->A; 4217 if (Ao) *Ao = a->B; 4218 if (colmap) *colmap = a->garray; 4219 PetscFunctionReturn(0); 4220 } 4221 4222 #undef __FUNCT__ 4223 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4224 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4225 { 4226 PetscErrorCode ierr; 4227 PetscInt i; 4228 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4229 4230 PetscFunctionBegin; 4231 if (coloring->ctype == IS_COLORING_GLOBAL) { 4232 ISColoringValue *allcolors,*colors; 4233 ISColoring ocoloring; 4234 4235 /* set coloring for diagonal portion */ 4236 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4237 4238 /* set coloring for off-diagonal portion */ 4239 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4240 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4241 for (i=0; i<a->B->cmap->n; i++) { 4242 colors[i] = allcolors[a->garray[i]]; 4243 } 4244 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4245 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4246 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4247 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4248 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4249 ISColoringValue *colors; 4250 PetscInt *larray; 4251 ISColoring ocoloring; 4252 4253 /* set coloring for diagonal portion */ 4254 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4255 for (i=0; i<a->A->cmap->n; i++) { 4256 larray[i] = i + A->cmap->rstart; 4257 } 4258 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4259 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4260 for (i=0; i<a->A->cmap->n; i++) { 4261 colors[i] = coloring->colors[larray[i]]; 4262 } 4263 ierr = PetscFree(larray);CHKERRQ(ierr); 4264 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4265 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4266 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4267 4268 /* set coloring for off-diagonal portion */ 4269 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4270 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4271 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4272 for (i=0; i<a->B->cmap->n; i++) { 4273 colors[i] = coloring->colors[larray[i]]; 4274 } 4275 ierr = PetscFree(larray);CHKERRQ(ierr); 4276 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4277 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4278 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4279 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4280 PetscFunctionReturn(0); 4281 } 4282 4283 #undef __FUNCT__ 4284 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4285 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4286 { 4287 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4288 PetscErrorCode ierr; 4289 4290 PetscFunctionBegin; 4291 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4292 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4293 PetscFunctionReturn(0); 4294 } 4295 4296 #undef __FUNCT__ 4297 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4298 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4299 { 4300 PetscErrorCode ierr; 4301 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4302 PetscInt *indx; 4303 4304 PetscFunctionBegin; 4305 /* This routine will ONLY return MPIAIJ type matrix */ 4306 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4307 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4308 if (n == PETSC_DECIDE) { 4309 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4310 } 4311 /* Check sum(n) = N */ 4312 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4313 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4314 4315 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4316 rstart -= m; 4317 4318 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4319 for (i=0; i<m; i++) { 4320 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4321 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4322 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4323 } 4324 4325 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4326 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4327 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4328 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4329 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4330 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4331 PetscFunctionReturn(0); 4332 } 4333 4334 #undef __FUNCT__ 4335 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4336 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4337 { 4338 PetscErrorCode ierr; 4339 PetscInt m,N,i,rstart,nnz,Ii; 4340 PetscInt *indx; 4341 PetscScalar *values; 4342 4343 PetscFunctionBegin; 4344 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4345 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4346 for (i=0; i<m; i++) { 4347 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4348 Ii = i + rstart; 4349 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4350 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4351 } 4352 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4353 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4354 PetscFunctionReturn(0); 4355 } 4356 4357 #undef __FUNCT__ 4358 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4359 /*@ 4360 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4361 matrices from each processor 4362 4363 Collective on MPI_Comm 4364 4365 Input Parameters: 4366 + comm - the communicators the parallel matrix will live on 4367 . inmat - the input sequential matrices 4368 . n - number of local columns (or PETSC_DECIDE) 4369 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4370 4371 Output Parameter: 4372 . outmat - the parallel matrix generated 4373 4374 Level: advanced 4375 4376 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4377 4378 @*/ 4379 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4380 { 4381 PetscErrorCode ierr; 4382 PetscMPIInt size; 4383 4384 PetscFunctionBegin; 4385 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4386 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4387 if (size == 1) { 4388 if (scall == MAT_INITIAL_MATRIX) { 4389 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4390 } else { 4391 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4392 } 4393 } else { 4394 if (scall == MAT_INITIAL_MATRIX) { 4395 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4396 } 4397 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4398 } 4399 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4400 PetscFunctionReturn(0); 4401 } 4402 4403 #undef __FUNCT__ 4404 #define __FUNCT__ "MatFileSplit" 4405 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4406 { 4407 PetscErrorCode ierr; 4408 PetscMPIInt rank; 4409 PetscInt m,N,i,rstart,nnz; 4410 size_t len; 4411 const PetscInt *indx; 4412 PetscViewer out; 4413 char *name; 4414 Mat B; 4415 const PetscScalar *values; 4416 4417 PetscFunctionBegin; 4418 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4419 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4420 /* Should this be the type of the diagonal block of A? */ 4421 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4422 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4423 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4424 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4425 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4426 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4427 for (i=0; i<m; i++) { 4428 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4429 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4430 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4431 } 4432 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4433 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4434 4435 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4436 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4437 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4438 sprintf(name,"%s.%d",outfile,rank); 4439 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4440 ierr = PetscFree(name);CHKERRQ(ierr); 4441 ierr = MatView(B,out);CHKERRQ(ierr); 4442 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4443 ierr = MatDestroy(&B);CHKERRQ(ierr); 4444 PetscFunctionReturn(0); 4445 } 4446 4447 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4448 #undef __FUNCT__ 4449 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4450 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4451 { 4452 PetscErrorCode ierr; 4453 Mat_Merge_SeqsToMPI *merge; 4454 PetscContainer container; 4455 4456 PetscFunctionBegin; 4457 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4458 if (container) { 4459 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4460 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4461 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4462 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4463 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4464 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4465 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4466 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4467 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4468 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4469 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4470 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4471 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4472 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4473 ierr = PetscFree(merge);CHKERRQ(ierr); 4474 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4475 } 4476 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4477 PetscFunctionReturn(0); 4478 } 4479 4480 #include <../src/mat/utils/freespace.h> 4481 #include <petscbt.h> 4482 4483 #undef __FUNCT__ 4484 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4485 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4486 { 4487 PetscErrorCode ierr; 4488 MPI_Comm comm; 4489 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4490 PetscMPIInt size,rank,taga,*len_s; 4491 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4492 PetscInt proc,m; 4493 PetscInt **buf_ri,**buf_rj; 4494 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4495 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4496 MPI_Request *s_waits,*r_waits; 4497 MPI_Status *status; 4498 MatScalar *aa=a->a; 4499 MatScalar **abuf_r,*ba_i; 4500 Mat_Merge_SeqsToMPI *merge; 4501 PetscContainer container; 4502 4503 PetscFunctionBegin; 4504 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4505 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4506 4507 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4508 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4509 4510 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4511 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4512 4513 bi = merge->bi; 4514 bj = merge->bj; 4515 buf_ri = merge->buf_ri; 4516 buf_rj = merge->buf_rj; 4517 4518 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4519 owners = merge->rowmap->range; 4520 len_s = merge->len_s; 4521 4522 /* send and recv matrix values */ 4523 /*-----------------------------*/ 4524 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4525 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4526 4527 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4528 for (proc=0,k=0; proc<size; proc++) { 4529 if (!len_s[proc]) continue; 4530 i = owners[proc]; 4531 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4532 k++; 4533 } 4534 4535 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4536 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4537 ierr = PetscFree(status);CHKERRQ(ierr); 4538 4539 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4540 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4541 4542 /* insert mat values of mpimat */ 4543 /*----------------------------*/ 4544 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4545 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4546 4547 for (k=0; k<merge->nrecv; k++) { 4548 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4549 nrows = *(buf_ri_k[k]); 4550 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4551 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4552 } 4553 4554 /* set values of ba */ 4555 m = merge->rowmap->n; 4556 for (i=0; i<m; i++) { 4557 arow = owners[rank] + i; 4558 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4559 bnzi = bi[i+1] - bi[i]; 4560 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4561 4562 /* add local non-zero vals of this proc's seqmat into ba */ 4563 anzi = ai[arow+1] - ai[arow]; 4564 aj = a->j + ai[arow]; 4565 aa = a->a + ai[arow]; 4566 nextaj = 0; 4567 for (j=0; nextaj<anzi; j++) { 4568 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4569 ba_i[j] += aa[nextaj++]; 4570 } 4571 } 4572 4573 /* add received vals into ba */ 4574 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4575 /* i-th row */ 4576 if (i == *nextrow[k]) { 4577 anzi = *(nextai[k]+1) - *nextai[k]; 4578 aj = buf_rj[k] + *(nextai[k]); 4579 aa = abuf_r[k] + *(nextai[k]); 4580 nextaj = 0; 4581 for (j=0; nextaj<anzi; j++) { 4582 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4583 ba_i[j] += aa[nextaj++]; 4584 } 4585 } 4586 nextrow[k]++; nextai[k]++; 4587 } 4588 } 4589 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4590 } 4591 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4592 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4593 4594 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4595 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4596 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4597 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4598 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4599 PetscFunctionReturn(0); 4600 } 4601 4602 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4603 4604 #undef __FUNCT__ 4605 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4606 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4607 { 4608 PetscErrorCode ierr; 4609 Mat B_mpi; 4610 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4611 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4612 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4613 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4614 PetscInt len,proc,*dnz,*onz,bs,cbs; 4615 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4616 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4617 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4618 MPI_Status *status; 4619 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4620 PetscBT lnkbt; 4621 Mat_Merge_SeqsToMPI *merge; 4622 PetscContainer container; 4623 4624 PetscFunctionBegin; 4625 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4626 4627 /* make sure it is a PETSc comm */ 4628 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4629 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4630 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4631 4632 ierr = PetscNew(&merge);CHKERRQ(ierr); 4633 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4634 4635 /* determine row ownership */ 4636 /*---------------------------------------------------------*/ 4637 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4638 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4639 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4640 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4641 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4642 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4643 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4644 4645 m = merge->rowmap->n; 4646 owners = merge->rowmap->range; 4647 4648 /* determine the number of messages to send, their lengths */ 4649 /*---------------------------------------------------------*/ 4650 len_s = merge->len_s; 4651 4652 len = 0; /* length of buf_si[] */ 4653 merge->nsend = 0; 4654 for (proc=0; proc<size; proc++) { 4655 len_si[proc] = 0; 4656 if (proc == rank) { 4657 len_s[proc] = 0; 4658 } else { 4659 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4660 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4661 } 4662 if (len_s[proc]) { 4663 merge->nsend++; 4664 nrows = 0; 4665 for (i=owners[proc]; i<owners[proc+1]; i++) { 4666 if (ai[i+1] > ai[i]) nrows++; 4667 } 4668 len_si[proc] = 2*(nrows+1); 4669 len += len_si[proc]; 4670 } 4671 } 4672 4673 /* determine the number and length of messages to receive for ij-structure */ 4674 /*-------------------------------------------------------------------------*/ 4675 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4676 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4677 4678 /* post the Irecv of j-structure */ 4679 /*-------------------------------*/ 4680 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4681 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4682 4683 /* post the Isend of j-structure */ 4684 /*--------------------------------*/ 4685 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4686 4687 for (proc=0, k=0; proc<size; proc++) { 4688 if (!len_s[proc]) continue; 4689 i = owners[proc]; 4690 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4691 k++; 4692 } 4693 4694 /* receives and sends of j-structure are complete */ 4695 /*------------------------------------------------*/ 4696 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4697 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4698 4699 /* send and recv i-structure */ 4700 /*---------------------------*/ 4701 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4702 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4703 4704 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4705 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4706 for (proc=0,k=0; proc<size; proc++) { 4707 if (!len_s[proc]) continue; 4708 /* form outgoing message for i-structure: 4709 buf_si[0]: nrows to be sent 4710 [1:nrows]: row index (global) 4711 [nrows+1:2*nrows+1]: i-structure index 4712 */ 4713 /*-------------------------------------------*/ 4714 nrows = len_si[proc]/2 - 1; 4715 buf_si_i = buf_si + nrows+1; 4716 buf_si[0] = nrows; 4717 buf_si_i[0] = 0; 4718 nrows = 0; 4719 for (i=owners[proc]; i<owners[proc+1]; i++) { 4720 anzi = ai[i+1] - ai[i]; 4721 if (anzi) { 4722 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4723 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4724 nrows++; 4725 } 4726 } 4727 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4728 k++; 4729 buf_si += len_si[proc]; 4730 } 4731 4732 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4733 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4734 4735 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4736 for (i=0; i<merge->nrecv; i++) { 4737 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4738 } 4739 4740 ierr = PetscFree(len_si);CHKERRQ(ierr); 4741 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4742 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4743 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4744 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4745 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4746 ierr = PetscFree(status);CHKERRQ(ierr); 4747 4748 /* compute a local seq matrix in each processor */ 4749 /*----------------------------------------------*/ 4750 /* allocate bi array and free space for accumulating nonzero column info */ 4751 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4752 bi[0] = 0; 4753 4754 /* create and initialize a linked list */ 4755 nlnk = N+1; 4756 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4757 4758 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4759 len = ai[owners[rank+1]] - ai[owners[rank]]; 4760 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4761 4762 current_space = free_space; 4763 4764 /* determine symbolic info for each local row */ 4765 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4766 4767 for (k=0; k<merge->nrecv; k++) { 4768 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4769 nrows = *buf_ri_k[k]; 4770 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4771 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4772 } 4773 4774 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4775 len = 0; 4776 for (i=0; i<m; i++) { 4777 bnzi = 0; 4778 /* add local non-zero cols of this proc's seqmat into lnk */ 4779 arow = owners[rank] + i; 4780 anzi = ai[arow+1] - ai[arow]; 4781 aj = a->j + ai[arow]; 4782 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4783 bnzi += nlnk; 4784 /* add received col data into lnk */ 4785 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4786 if (i == *nextrow[k]) { /* i-th row */ 4787 anzi = *(nextai[k]+1) - *nextai[k]; 4788 aj = buf_rj[k] + *nextai[k]; 4789 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4790 bnzi += nlnk; 4791 nextrow[k]++; nextai[k]++; 4792 } 4793 } 4794 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4795 4796 /* if free space is not available, make more free space */ 4797 if (current_space->local_remaining<bnzi) { 4798 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4799 nspacedouble++; 4800 } 4801 /* copy data into free space, then initialize lnk */ 4802 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4803 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4804 4805 current_space->array += bnzi; 4806 current_space->local_used += bnzi; 4807 current_space->local_remaining -= bnzi; 4808 4809 bi[i+1] = bi[i] + bnzi; 4810 } 4811 4812 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4813 4814 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4815 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4816 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4817 4818 /* create symbolic parallel matrix B_mpi */ 4819 /*---------------------------------------*/ 4820 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4821 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4822 if (n==PETSC_DECIDE) { 4823 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4824 } else { 4825 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4826 } 4827 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4828 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4829 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4830 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4831 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4832 4833 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4834 B_mpi->assembled = PETSC_FALSE; 4835 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4836 merge->bi = bi; 4837 merge->bj = bj; 4838 merge->buf_ri = buf_ri; 4839 merge->buf_rj = buf_rj; 4840 merge->coi = NULL; 4841 merge->coj = NULL; 4842 merge->owners_co = NULL; 4843 4844 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4845 4846 /* attach the supporting struct to B_mpi for reuse */ 4847 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4848 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4849 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4850 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4851 *mpimat = B_mpi; 4852 4853 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4854 PetscFunctionReturn(0); 4855 } 4856 4857 #undef __FUNCT__ 4858 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4859 /*@C 4860 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4861 matrices from each processor 4862 4863 Collective on MPI_Comm 4864 4865 Input Parameters: 4866 + comm - the communicators the parallel matrix will live on 4867 . seqmat - the input sequential matrices 4868 . m - number of local rows (or PETSC_DECIDE) 4869 . n - number of local columns (or PETSC_DECIDE) 4870 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4871 4872 Output Parameter: 4873 . mpimat - the parallel matrix generated 4874 4875 Level: advanced 4876 4877 Notes: 4878 The dimensions of the sequential matrix in each processor MUST be the same. 4879 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4880 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4881 @*/ 4882 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4883 { 4884 PetscErrorCode ierr; 4885 PetscMPIInt size; 4886 4887 PetscFunctionBegin; 4888 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4889 if (size == 1) { 4890 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4891 if (scall == MAT_INITIAL_MATRIX) { 4892 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4893 } else { 4894 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4895 } 4896 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4897 PetscFunctionReturn(0); 4898 } 4899 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4900 if (scall == MAT_INITIAL_MATRIX) { 4901 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4902 } 4903 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4904 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4905 PetscFunctionReturn(0); 4906 } 4907 4908 #undef __FUNCT__ 4909 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4910 /*@ 4911 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4912 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4913 with MatGetSize() 4914 4915 Not Collective 4916 4917 Input Parameters: 4918 + A - the matrix 4919 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4920 4921 Output Parameter: 4922 . A_loc - the local sequential matrix generated 4923 4924 Level: developer 4925 4926 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4927 4928 @*/ 4929 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4930 { 4931 PetscErrorCode ierr; 4932 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4933 Mat_SeqAIJ *mat,*a,*b; 4934 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4935 MatScalar *aa,*ba,*cam; 4936 PetscScalar *ca; 4937 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4938 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4939 PetscBool match; 4940 MPI_Comm comm; 4941 PetscMPIInt size; 4942 4943 PetscFunctionBegin; 4944 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4945 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4946 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4947 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4948 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4949 4950 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4951 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4952 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4953 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4954 aa = a->a; ba = b->a; 4955 if (scall == MAT_INITIAL_MATRIX) { 4956 if (size == 1) { 4957 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4958 PetscFunctionReturn(0); 4959 } 4960 4961 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4962 ci[0] = 0; 4963 for (i=0; i<am; i++) { 4964 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4965 } 4966 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4967 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4968 k = 0; 4969 for (i=0; i<am; i++) { 4970 ncols_o = bi[i+1] - bi[i]; 4971 ncols_d = ai[i+1] - ai[i]; 4972 /* off-diagonal portion of A */ 4973 for (jo=0; jo<ncols_o; jo++) { 4974 col = cmap[*bj]; 4975 if (col >= cstart) break; 4976 cj[k] = col; bj++; 4977 ca[k++] = *ba++; 4978 } 4979 /* diagonal portion of A */ 4980 for (j=0; j<ncols_d; j++) { 4981 cj[k] = cstart + *aj++; 4982 ca[k++] = *aa++; 4983 } 4984 /* off-diagonal portion of A */ 4985 for (j=jo; j<ncols_o; j++) { 4986 cj[k] = cmap[*bj++]; 4987 ca[k++] = *ba++; 4988 } 4989 } 4990 /* put together the new matrix */ 4991 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4992 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4993 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4994 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4995 mat->free_a = PETSC_TRUE; 4996 mat->free_ij = PETSC_TRUE; 4997 mat->nonew = 0; 4998 } else if (scall == MAT_REUSE_MATRIX) { 4999 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5000 ci = mat->i; cj = mat->j; cam = mat->a; 5001 for (i=0; i<am; i++) { 5002 /* off-diagonal portion of A */ 5003 ncols_o = bi[i+1] - bi[i]; 5004 for (jo=0; jo<ncols_o; jo++) { 5005 col = cmap[*bj]; 5006 if (col >= cstart) break; 5007 *cam++ = *ba++; bj++; 5008 } 5009 /* diagonal portion of A */ 5010 ncols_d = ai[i+1] - ai[i]; 5011 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5012 /* off-diagonal portion of A */ 5013 for (j=jo; j<ncols_o; j++) { 5014 *cam++ = *ba++; bj++; 5015 } 5016 } 5017 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5018 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5019 PetscFunctionReturn(0); 5020 } 5021 5022 #undef __FUNCT__ 5023 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5024 /*@C 5025 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5026 5027 Not Collective 5028 5029 Input Parameters: 5030 + A - the matrix 5031 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5032 - row, col - index sets of rows and columns to extract (or NULL) 5033 5034 Output Parameter: 5035 . A_loc - the local sequential matrix generated 5036 5037 Level: developer 5038 5039 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5040 5041 @*/ 5042 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5043 { 5044 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5045 PetscErrorCode ierr; 5046 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5047 IS isrowa,iscola; 5048 Mat *aloc; 5049 PetscBool match; 5050 5051 PetscFunctionBegin; 5052 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5053 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5054 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5055 if (!row) { 5056 start = A->rmap->rstart; end = A->rmap->rend; 5057 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5058 } else { 5059 isrowa = *row; 5060 } 5061 if (!col) { 5062 start = A->cmap->rstart; 5063 cmap = a->garray; 5064 nzA = a->A->cmap->n; 5065 nzB = a->B->cmap->n; 5066 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5067 ncols = 0; 5068 for (i=0; i<nzB; i++) { 5069 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5070 else break; 5071 } 5072 imark = i; 5073 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5074 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5075 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5076 } else { 5077 iscola = *col; 5078 } 5079 if (scall != MAT_INITIAL_MATRIX) { 5080 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5081 aloc[0] = *A_loc; 5082 } 5083 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5084 *A_loc = aloc[0]; 5085 ierr = PetscFree(aloc);CHKERRQ(ierr); 5086 if (!row) { 5087 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5088 } 5089 if (!col) { 5090 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5091 } 5092 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5093 PetscFunctionReturn(0); 5094 } 5095 5096 #undef __FUNCT__ 5097 #define __FUNCT__ "MatGetBrowsOfAcols" 5098 /*@C 5099 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5100 5101 Collective on Mat 5102 5103 Input Parameters: 5104 + A,B - the matrices in mpiaij format 5105 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5106 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5107 5108 Output Parameter: 5109 + rowb, colb - index sets of rows and columns of B to extract 5110 - B_seq - the sequential matrix generated 5111 5112 Level: developer 5113 5114 @*/ 5115 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5116 { 5117 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5118 PetscErrorCode ierr; 5119 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5120 IS isrowb,iscolb; 5121 Mat *bseq=NULL; 5122 5123 PetscFunctionBegin; 5124 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5125 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5126 } 5127 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5128 5129 if (scall == MAT_INITIAL_MATRIX) { 5130 start = A->cmap->rstart; 5131 cmap = a->garray; 5132 nzA = a->A->cmap->n; 5133 nzB = a->B->cmap->n; 5134 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5135 ncols = 0; 5136 for (i=0; i<nzB; i++) { /* row < local row index */ 5137 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5138 else break; 5139 } 5140 imark = i; 5141 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5142 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5143 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5144 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5145 } else { 5146 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5147 isrowb = *rowb; iscolb = *colb; 5148 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5149 bseq[0] = *B_seq; 5150 } 5151 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5152 *B_seq = bseq[0]; 5153 ierr = PetscFree(bseq);CHKERRQ(ierr); 5154 if (!rowb) { 5155 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5156 } else { 5157 *rowb = isrowb; 5158 } 5159 if (!colb) { 5160 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5161 } else { 5162 *colb = iscolb; 5163 } 5164 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5165 PetscFunctionReturn(0); 5166 } 5167 5168 #undef __FUNCT__ 5169 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5170 /* 5171 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5172 of the OFF-DIAGONAL portion of local A 5173 5174 Collective on Mat 5175 5176 Input Parameters: 5177 + A,B - the matrices in mpiaij format 5178 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5179 5180 Output Parameter: 5181 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5182 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5183 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5184 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5185 5186 Level: developer 5187 5188 */ 5189 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5190 { 5191 VecScatter_MPI_General *gen_to,*gen_from; 5192 PetscErrorCode ierr; 5193 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5194 Mat_SeqAIJ *b_oth; 5195 VecScatter ctx =a->Mvctx; 5196 MPI_Comm comm; 5197 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5198 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5199 PetscScalar *rvalues,*svalues; 5200 MatScalar *b_otha,*bufa,*bufA; 5201 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5202 MPI_Request *rwaits = NULL,*swaits = NULL; 5203 MPI_Status *sstatus,rstatus; 5204 PetscMPIInt jj,size; 5205 PetscInt *cols,sbs,rbs; 5206 PetscScalar *vals; 5207 5208 PetscFunctionBegin; 5209 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5210 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5211 if (size == 1) PetscFunctionReturn(0); 5212 5213 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5214 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5215 } 5216 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5217 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5218 5219 gen_to = (VecScatter_MPI_General*)ctx->todata; 5220 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5221 rvalues = gen_from->values; /* holds the length of receiving row */ 5222 svalues = gen_to->values; /* holds the length of sending row */ 5223 nrecvs = gen_from->n; 5224 nsends = gen_to->n; 5225 5226 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5227 srow = gen_to->indices; /* local row index to be sent */ 5228 sstarts = gen_to->starts; 5229 sprocs = gen_to->procs; 5230 sstatus = gen_to->sstatus; 5231 sbs = gen_to->bs; 5232 rstarts = gen_from->starts; 5233 rprocs = gen_from->procs; 5234 rbs = gen_from->bs; 5235 5236 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5237 if (scall == MAT_INITIAL_MATRIX) { 5238 /* i-array */ 5239 /*---------*/ 5240 /* post receives */ 5241 for (i=0; i<nrecvs; i++) { 5242 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5243 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5244 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5245 } 5246 5247 /* pack the outgoing message */ 5248 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5249 5250 sstartsj[0] = 0; 5251 rstartsj[0] = 0; 5252 len = 0; /* total length of j or a array to be sent */ 5253 k = 0; 5254 for (i=0; i<nsends; i++) { 5255 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5256 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5257 for (j=0; j<nrows; j++) { 5258 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5259 for (l=0; l<sbs; l++) { 5260 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5261 5262 rowlen[j*sbs+l] = ncols; 5263 5264 len += ncols; 5265 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5266 } 5267 k++; 5268 } 5269 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5270 5271 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5272 } 5273 /* recvs and sends of i-array are completed */ 5274 i = nrecvs; 5275 while (i--) { 5276 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5277 } 5278 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5279 5280 /* allocate buffers for sending j and a arrays */ 5281 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5282 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5283 5284 /* create i-array of B_oth */ 5285 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5286 5287 b_othi[0] = 0; 5288 len = 0; /* total length of j or a array to be received */ 5289 k = 0; 5290 for (i=0; i<nrecvs; i++) { 5291 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5292 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5293 for (j=0; j<nrows; j++) { 5294 b_othi[k+1] = b_othi[k] + rowlen[j]; 5295 len += rowlen[j]; k++; 5296 } 5297 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5298 } 5299 5300 /* allocate space for j and a arrrays of B_oth */ 5301 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5302 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5303 5304 /* j-array */ 5305 /*---------*/ 5306 /* post receives of j-array */ 5307 for (i=0; i<nrecvs; i++) { 5308 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5309 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5310 } 5311 5312 /* pack the outgoing message j-array */ 5313 k = 0; 5314 for (i=0; i<nsends; i++) { 5315 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5316 bufJ = bufj+sstartsj[i]; 5317 for (j=0; j<nrows; j++) { 5318 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5319 for (ll=0; ll<sbs; ll++) { 5320 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5321 for (l=0; l<ncols; l++) { 5322 *bufJ++ = cols[l]; 5323 } 5324 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5325 } 5326 } 5327 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5328 } 5329 5330 /* recvs and sends of j-array are completed */ 5331 i = nrecvs; 5332 while (i--) { 5333 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5334 } 5335 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5336 } else if (scall == MAT_REUSE_MATRIX) { 5337 sstartsj = *startsj_s; 5338 rstartsj = *startsj_r; 5339 bufa = *bufa_ptr; 5340 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5341 b_otha = b_oth->a; 5342 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5343 5344 /* a-array */ 5345 /*---------*/ 5346 /* post receives of a-array */ 5347 for (i=0; i<nrecvs; i++) { 5348 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5349 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5350 } 5351 5352 /* pack the outgoing message a-array */ 5353 k = 0; 5354 for (i=0; i<nsends; i++) { 5355 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5356 bufA = bufa+sstartsj[i]; 5357 for (j=0; j<nrows; j++) { 5358 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5359 for (ll=0; ll<sbs; ll++) { 5360 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5361 for (l=0; l<ncols; l++) { 5362 *bufA++ = vals[l]; 5363 } 5364 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5365 } 5366 } 5367 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5368 } 5369 /* recvs and sends of a-array are completed */ 5370 i = nrecvs; 5371 while (i--) { 5372 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5373 } 5374 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5375 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5376 5377 if (scall == MAT_INITIAL_MATRIX) { 5378 /* put together the new matrix */ 5379 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5380 5381 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5382 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5383 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5384 b_oth->free_a = PETSC_TRUE; 5385 b_oth->free_ij = PETSC_TRUE; 5386 b_oth->nonew = 0; 5387 5388 ierr = PetscFree(bufj);CHKERRQ(ierr); 5389 if (!startsj_s || !bufa_ptr) { 5390 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5391 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5392 } else { 5393 *startsj_s = sstartsj; 5394 *startsj_r = rstartsj; 5395 *bufa_ptr = bufa; 5396 } 5397 } 5398 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5399 PetscFunctionReturn(0); 5400 } 5401 5402 #undef __FUNCT__ 5403 #define __FUNCT__ "MatGetCommunicationStructs" 5404 /*@C 5405 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5406 5407 Not Collective 5408 5409 Input Parameters: 5410 . A - The matrix in mpiaij format 5411 5412 Output Parameter: 5413 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5414 . colmap - A map from global column index to local index into lvec 5415 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5416 5417 Level: developer 5418 5419 @*/ 5420 #if defined(PETSC_USE_CTABLE) 5421 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5422 #else 5423 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5424 #endif 5425 { 5426 Mat_MPIAIJ *a; 5427 5428 PetscFunctionBegin; 5429 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5430 PetscValidPointer(lvec, 2); 5431 PetscValidPointer(colmap, 3); 5432 PetscValidPointer(multScatter, 4); 5433 a = (Mat_MPIAIJ*) A->data; 5434 if (lvec) *lvec = a->lvec; 5435 if (colmap) *colmap = a->colmap; 5436 if (multScatter) *multScatter = a->Mvctx; 5437 PetscFunctionReturn(0); 5438 } 5439 5440 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5441 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5442 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5443 5444 #undef __FUNCT__ 5445 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5446 /* 5447 Computes (B'*A')' since computing B*A directly is untenable 5448 5449 n p p 5450 ( ) ( ) ( ) 5451 m ( A ) * n ( B ) = m ( C ) 5452 ( ) ( ) ( ) 5453 5454 */ 5455 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5456 { 5457 PetscErrorCode ierr; 5458 Mat At,Bt,Ct; 5459 5460 PetscFunctionBegin; 5461 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5462 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5463 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5464 ierr = MatDestroy(&At);CHKERRQ(ierr); 5465 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5466 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5467 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5468 PetscFunctionReturn(0); 5469 } 5470 5471 #undef __FUNCT__ 5472 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5473 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5474 { 5475 PetscErrorCode ierr; 5476 PetscInt m=A->rmap->n,n=B->cmap->n; 5477 Mat Cmat; 5478 5479 PetscFunctionBegin; 5480 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5481 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5482 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5483 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5484 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5485 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5486 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5487 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5488 5489 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5490 5491 *C = Cmat; 5492 PetscFunctionReturn(0); 5493 } 5494 5495 /* ----------------------------------------------------------------*/ 5496 #undef __FUNCT__ 5497 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5498 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5499 { 5500 PetscErrorCode ierr; 5501 5502 PetscFunctionBegin; 5503 if (scall == MAT_INITIAL_MATRIX) { 5504 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5505 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5506 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5507 } 5508 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5509 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5510 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5511 PetscFunctionReturn(0); 5512 } 5513 5514 #if defined(PETSC_HAVE_MUMPS) 5515 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5516 #endif 5517 #if defined(PETSC_HAVE_PASTIX) 5518 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5519 #endif 5520 #if defined(PETSC_HAVE_SUPERLU_DIST) 5521 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5522 #endif 5523 #if defined(PETSC_HAVE_CLIQUE) 5524 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5525 #endif 5526 5527 /*MC 5528 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5529 5530 Options Database Keys: 5531 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5532 5533 Level: beginner 5534 5535 .seealso: MatCreateAIJ() 5536 M*/ 5537 5538 #undef __FUNCT__ 5539 #define __FUNCT__ "MatCreate_MPIAIJ" 5540 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5541 { 5542 Mat_MPIAIJ *b; 5543 PetscErrorCode ierr; 5544 PetscMPIInt size; 5545 5546 PetscFunctionBegin; 5547 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5548 5549 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5550 B->data = (void*)b; 5551 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5552 B->assembled = PETSC_FALSE; 5553 B->insertmode = NOT_SET_VALUES; 5554 b->size = size; 5555 5556 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5557 5558 /* build cache for off array entries formed */ 5559 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5560 5561 b->donotstash = PETSC_FALSE; 5562 b->colmap = 0; 5563 b->garray = 0; 5564 b->roworiented = PETSC_TRUE; 5565 5566 /* stuff used for matrix vector multiply */ 5567 b->lvec = NULL; 5568 b->Mvctx = NULL; 5569 5570 /* stuff for MatGetRow() */ 5571 b->rowindices = 0; 5572 b->rowvalues = 0; 5573 b->getrowactive = PETSC_FALSE; 5574 5575 /* flexible pointer used in CUSP/CUSPARSE classes */ 5576 b->spptr = NULL; 5577 5578 #if defined(PETSC_HAVE_MUMPS) 5579 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5580 #endif 5581 #if defined(PETSC_HAVE_PASTIX) 5582 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5583 #endif 5584 #if defined(PETSC_HAVE_SUPERLU_DIST) 5585 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5586 #endif 5587 #if defined(PETSC_HAVE_CLIQUE) 5588 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5589 #endif 5590 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5591 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5593 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5604 PetscFunctionReturn(0); 5605 } 5606 5607 #undef __FUNCT__ 5608 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5609 /*@C 5610 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5611 and "off-diagonal" part of the matrix in CSR format. 5612 5613 Collective on MPI_Comm 5614 5615 Input Parameters: 5616 + comm - MPI communicator 5617 . m - number of local rows (Cannot be PETSC_DECIDE) 5618 . n - This value should be the same as the local size used in creating the 5619 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5620 calculated if N is given) For square matrices n is almost always m. 5621 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5622 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5623 . i - row indices for "diagonal" portion of matrix 5624 . j - column indices 5625 . a - matrix values 5626 . oi - row indices for "off-diagonal" portion of matrix 5627 . oj - column indices 5628 - oa - matrix values 5629 5630 Output Parameter: 5631 . mat - the matrix 5632 5633 Level: advanced 5634 5635 Notes: 5636 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5637 must free the arrays once the matrix has been destroyed and not before. 5638 5639 The i and j indices are 0 based 5640 5641 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5642 5643 This sets local rows and cannot be used to set off-processor values. 5644 5645 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5646 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5647 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5648 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5649 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5650 communication if it is known that only local entries will be set. 5651 5652 .keywords: matrix, aij, compressed row, sparse, parallel 5653 5654 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5655 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5656 C@*/ 5657 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5658 { 5659 PetscErrorCode ierr; 5660 Mat_MPIAIJ *maij; 5661 5662 PetscFunctionBegin; 5663 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5664 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5665 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5666 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5667 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5668 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5669 maij = (Mat_MPIAIJ*) (*mat)->data; 5670 5671 (*mat)->preallocated = PETSC_TRUE; 5672 5673 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5674 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5675 5676 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5677 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5678 5679 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5680 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5681 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5682 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5683 5684 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5685 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5686 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5687 PetscFunctionReturn(0); 5688 } 5689 5690 /* 5691 Special version for direct calls from Fortran 5692 */ 5693 #include <petsc-private/fortranimpl.h> 5694 5695 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5696 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5697 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5698 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5699 #endif 5700 5701 /* Change these macros so can be used in void function */ 5702 #undef CHKERRQ 5703 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5704 #undef SETERRQ2 5705 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5706 #undef SETERRQ3 5707 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5708 #undef SETERRQ 5709 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5710 5711 #undef __FUNCT__ 5712 #define __FUNCT__ "matsetvaluesmpiaij_" 5713 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5714 { 5715 Mat mat = *mmat; 5716 PetscInt m = *mm, n = *mn; 5717 InsertMode addv = *maddv; 5718 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5719 PetscScalar value; 5720 PetscErrorCode ierr; 5721 5722 MatCheckPreallocated(mat,1); 5723 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5724 5725 #if defined(PETSC_USE_DEBUG) 5726 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5727 #endif 5728 { 5729 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5730 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5731 PetscBool roworiented = aij->roworiented; 5732 5733 /* Some Variables required in the macro */ 5734 Mat A = aij->A; 5735 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5736 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5737 MatScalar *aa = a->a; 5738 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5739 Mat B = aij->B; 5740 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5741 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5742 MatScalar *ba = b->a; 5743 5744 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5745 PetscInt nonew = a->nonew; 5746 MatScalar *ap1,*ap2; 5747 5748 PetscFunctionBegin; 5749 for (i=0; i<m; i++) { 5750 if (im[i] < 0) continue; 5751 #if defined(PETSC_USE_DEBUG) 5752 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5753 #endif 5754 if (im[i] >= rstart && im[i] < rend) { 5755 row = im[i] - rstart; 5756 lastcol1 = -1; 5757 rp1 = aj + ai[row]; 5758 ap1 = aa + ai[row]; 5759 rmax1 = aimax[row]; 5760 nrow1 = ailen[row]; 5761 low1 = 0; 5762 high1 = nrow1; 5763 lastcol2 = -1; 5764 rp2 = bj + bi[row]; 5765 ap2 = ba + bi[row]; 5766 rmax2 = bimax[row]; 5767 nrow2 = bilen[row]; 5768 low2 = 0; 5769 high2 = nrow2; 5770 5771 for (j=0; j<n; j++) { 5772 if (roworiented) value = v[i*n+j]; 5773 else value = v[i+j*m]; 5774 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5775 if (in[j] >= cstart && in[j] < cend) { 5776 col = in[j] - cstart; 5777 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5778 } else if (in[j] < 0) continue; 5779 #if defined(PETSC_USE_DEBUG) 5780 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5781 #endif 5782 else { 5783 if (mat->was_assembled) { 5784 if (!aij->colmap) { 5785 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5786 } 5787 #if defined(PETSC_USE_CTABLE) 5788 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5789 col--; 5790 #else 5791 col = aij->colmap[in[j]] - 1; 5792 #endif 5793 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5794 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5795 col = in[j]; 5796 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5797 B = aij->B; 5798 b = (Mat_SeqAIJ*)B->data; 5799 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5800 rp2 = bj + bi[row]; 5801 ap2 = ba + bi[row]; 5802 rmax2 = bimax[row]; 5803 nrow2 = bilen[row]; 5804 low2 = 0; 5805 high2 = nrow2; 5806 bm = aij->B->rmap->n; 5807 ba = b->a; 5808 } 5809 } else col = in[j]; 5810 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5811 } 5812 } 5813 } else if (!aij->donotstash) { 5814 if (roworiented) { 5815 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5816 } else { 5817 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5818 } 5819 } 5820 } 5821 } 5822 PetscFunctionReturnVoid(); 5823 } 5824 5825