1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 1303 PetscFunctionBegin; 1304 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1305 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1306 nz = A->nz + B->nz; 1307 if (!rank) { 1308 header[0] = MAT_FILE_CLASSID; 1309 header[1] = mat->rmap->N; 1310 header[2] = mat->cmap->N; 1311 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1312 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1313 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1314 /* get largest number of rows any processor has */ 1315 rlen = mat->rmap->n; 1316 range = mat->rmap->range; 1317 for (i=1; i<size; i++) { 1318 rlen = PetscMax(rlen,range[i+1] - range[i]); 1319 } 1320 } else { 1321 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1322 rlen = mat->rmap->n; 1323 } 1324 1325 /* load up the local row counts */ 1326 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1327 for (i=0; i<mat->rmap->n; i++) { 1328 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1329 } 1330 1331 /* store the row lengths to the file */ 1332 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1333 if (!rank) { 1334 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 for (i=1; i<size; i++) { 1336 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1337 rlen = range[i+1] - range[i]; 1338 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1339 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1340 } 1341 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1344 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1346 } 1347 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1348 1349 /* load up the local column indices */ 1350 nzmax = nz; /* )th processor needs space a largest processor needs */ 1351 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1352 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1353 cnt = 0; 1354 for (i=0; i<mat->rmap->n; i++) { 1355 for (j=B->i[i]; j<B->i[i+1]; j++) { 1356 if ( (col = garray[B->j[j]]) > cstart) break; 1357 column_indices[cnt++] = col; 1358 } 1359 for (k=A->i[i]; k<A->i[i+1]; k++) { 1360 column_indices[cnt++] = A->j[k] + cstart; 1361 } 1362 for (; j<B->i[i+1]; j++) { 1363 column_indices[cnt++] = garray[B->j[j]]; 1364 } 1365 } 1366 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1367 1368 /* store the column indices to the file */ 1369 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1370 if (!rank) { 1371 MPI_Status status; 1372 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1373 for (i=1; i<size; i++) { 1374 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1375 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1376 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1377 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1378 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1379 } 1380 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1383 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1384 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1388 1389 /* load up the local column values */ 1390 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1391 cnt = 0; 1392 for (i=0; i<mat->rmap->n; i++) { 1393 for (j=B->i[i]; j<B->i[i+1]; j++) { 1394 if ( garray[B->j[j]] > cstart) break; 1395 column_values[cnt++] = B->a[j]; 1396 } 1397 for (k=A->i[i]; k<A->i[i+1]; k++) { 1398 column_values[cnt++] = A->a[k]; 1399 } 1400 for (; j<B->i[i+1]; j++) { 1401 column_values[cnt++] = B->a[j]; 1402 } 1403 } 1404 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1405 1406 /* store the column values to the file */ 1407 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1408 if (!rank) { 1409 MPI_Status status; 1410 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1411 for (i=1; i<size; i++) { 1412 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1413 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1414 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1415 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1416 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1417 } 1418 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1419 } else { 1420 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1421 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1422 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1424 } 1425 ierr = PetscFree(column_values);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1431 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1432 { 1433 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1434 PetscErrorCode ierr; 1435 PetscMPIInt rank = aij->rank,size = aij->size; 1436 PetscBool isdraw,iascii,isbinary; 1437 PetscViewer sviewer; 1438 PetscViewerFormat format; 1439 1440 PetscFunctionBegin; 1441 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1442 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1443 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1444 if (iascii) { 1445 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1446 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1447 MatInfo info; 1448 PetscBool inodes; 1449 1450 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1451 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1452 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1454 if (!inodes) { 1455 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1456 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1457 } else { 1458 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1459 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1460 } 1461 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1463 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1465 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1468 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1471 PetscInt inodecount,inodelimit,*inodes; 1472 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1473 if (inodes) { 1474 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1475 } else { 1476 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1477 } 1478 PetscFunctionReturn(0); 1479 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1480 PetscFunctionReturn(0); 1481 } 1482 } else if (isbinary) { 1483 if (size == 1) { 1484 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1485 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1486 } else { 1487 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1488 } 1489 PetscFunctionReturn(0); 1490 } else if (isdraw) { 1491 PetscDraw draw; 1492 PetscBool isnull; 1493 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1494 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1495 } 1496 1497 if (size == 1) { 1498 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1500 } else { 1501 /* assemble the entire matrix onto first processor. */ 1502 Mat A; 1503 Mat_SeqAIJ *Aloc; 1504 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1505 MatScalar *a; 1506 1507 if (mat->rmap->N > 1024) { 1508 PetscBool flg = PETSC_FALSE; 1509 1510 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1511 if (!flg) { 1512 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1513 } 1514 } 1515 1516 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1517 if (!rank) { 1518 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1519 } else { 1520 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1521 } 1522 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1523 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1524 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1525 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1526 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1527 1528 /* copy over the A part */ 1529 Aloc = (Mat_SeqAIJ*)aij->A->data; 1530 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1531 row = mat->rmap->rstart; 1532 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1533 for (i=0; i<m; i++) { 1534 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1535 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1536 } 1537 aj = Aloc->j; 1538 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1539 1540 /* copy over the B part */ 1541 Aloc = (Mat_SeqAIJ*)aij->B->data; 1542 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 row = mat->rmap->rstart; 1544 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1545 ct = cols; 1546 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1547 for (i=0; i<m; i++) { 1548 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1549 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1550 } 1551 ierr = PetscFree(ct);CHKERRQ(ierr); 1552 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1554 /* 1555 Everyone has to call to draw the matrix since the graphics waits are 1556 synchronized across all processors that share the PetscDraw object 1557 */ 1558 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1559 if (!rank) { 1560 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1561 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1562 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1563 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1564 } 1565 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 ierr = MatDestroy(&A);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatView_MPIAIJ" 1573 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1574 { 1575 PetscErrorCode ierr; 1576 PetscBool iascii,isdraw,issocket,isbinary; 1577 1578 PetscFunctionBegin; 1579 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1580 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1581 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1582 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1583 if (iascii || isdraw || isbinary || issocket) { 1584 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1585 } else { 1586 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatSOR_MPIAIJ" 1593 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1594 { 1595 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1596 PetscErrorCode ierr; 1597 Vec bb1 = 0; 1598 PetscBool hasop; 1599 1600 PetscFunctionBegin; 1601 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1602 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1603 } 1604 1605 if (flag == SOR_APPLY_UPPER) { 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1611 if (flag & SOR_ZERO_INITIAL_GUESS) { 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1613 its--; 1614 } 1615 1616 while (its--) { 1617 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 1620 /* update rhs: bb1 = bb - B*x */ 1621 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1622 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1623 1624 /* local sweep */ 1625 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1626 } 1627 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1628 if (flag & SOR_ZERO_INITIAL_GUESS) { 1629 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1630 its--; 1631 } 1632 while (its--) { 1633 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1634 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1635 1636 /* update rhs: bb1 = bb - B*x */ 1637 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1638 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1639 1640 /* local sweep */ 1641 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1642 } 1643 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1644 if (flag & SOR_ZERO_INITIAL_GUESS) { 1645 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1646 its--; 1647 } 1648 while (its--) { 1649 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1651 1652 /* update rhs: bb1 = bb - B*x */ 1653 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1654 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1655 1656 /* local sweep */ 1657 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1658 } 1659 } else if (flag & SOR_EISENSTAT) { 1660 Vec xx1; 1661 1662 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1663 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1664 1665 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 if (!mat->diag) { 1668 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1669 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1670 } 1671 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1672 if (hasop) { 1673 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1674 } else { 1675 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1676 } 1677 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1678 1679 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1680 1681 /* local sweep */ 1682 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1683 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1684 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1685 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1686 1687 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatPermute_MPIAIJ" 1693 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1694 { 1695 MPI_Comm comm; 1696 PetscInt first,local_rowsize,local_colsize; 1697 const PetscInt *rows; 1698 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1703 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1704 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1705 /* collect the global row permutation and invert it */ 1706 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1707 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1708 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1709 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1710 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1711 /* get the local target indices */ 1712 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1713 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1714 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1715 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1716 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1717 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1718 /* the column permutation is so much easier; 1719 make a local version of 'colp' and invert it */ 1720 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1721 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1722 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1723 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1724 /* now we just get the submatrix */ 1725 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1726 /* clean up */ 1727 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1728 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1735 { 1736 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1737 Mat A = mat->A,B = mat->B; 1738 PetscErrorCode ierr; 1739 PetscReal isend[5],irecv[5]; 1740 1741 PetscFunctionBegin; 1742 info->block_size = 1.0; 1743 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1744 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1745 isend[3] = info->memory; isend[4] = info->mallocs; 1746 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1747 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1748 isend[3] += info->memory; isend[4] += info->mallocs; 1749 if (flag == MAT_LOCAL) { 1750 info->nz_used = isend[0]; 1751 info->nz_allocated = isend[1]; 1752 info->nz_unneeded = isend[2]; 1753 info->memory = isend[3]; 1754 info->mallocs = isend[4]; 1755 } else if (flag == MAT_GLOBAL_MAX) { 1756 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1757 info->nz_used = irecv[0]; 1758 info->nz_allocated = irecv[1]; 1759 info->nz_unneeded = irecv[2]; 1760 info->memory = irecv[3]; 1761 info->mallocs = irecv[4]; 1762 } else if (flag == MAT_GLOBAL_SUM) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } 1770 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1771 info->fill_ratio_needed = 0; 1772 info->factor_mallocs = 0; 1773 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatSetOption_MPIAIJ" 1779 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1780 { 1781 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1782 PetscErrorCode ierr; 1783 1784 PetscFunctionBegin; 1785 switch (op) { 1786 case MAT_NEW_NONZERO_LOCATIONS: 1787 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1788 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1789 case MAT_KEEP_NONZERO_PATTERN: 1790 case MAT_NEW_NONZERO_LOCATION_ERR: 1791 case MAT_USE_INODES: 1792 case MAT_IGNORE_ZERO_ENTRIES: 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1795 break; 1796 case MAT_ROW_ORIENTED: 1797 a->roworiented = flg; 1798 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1799 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1800 break; 1801 case MAT_NEW_DIAGONALS: 1802 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1803 break; 1804 case MAT_IGNORE_OFF_PROC_ENTRIES: 1805 a->donotstash = flg; 1806 break; 1807 case MAT_SPD: 1808 A->spd_set = PETSC_TRUE; 1809 A->spd = flg; 1810 if (flg) { 1811 A->symmetric = PETSC_TRUE; 1812 A->structurally_symmetric = PETSC_TRUE; 1813 A->symmetric_set = PETSC_TRUE; 1814 A->structurally_symmetric_set = PETSC_TRUE; 1815 } 1816 break; 1817 case MAT_SYMMETRIC: 1818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_STRUCTURALLY_SYMMETRIC: 1821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1822 break; 1823 case MAT_HERMITIAN: 1824 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1825 break; 1826 case MAT_SYMMETRY_ETERNAL: 1827 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1828 break; 1829 default: 1830 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1831 } 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatGetRow_MPIAIJ" 1837 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1838 { 1839 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1840 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1841 PetscErrorCode ierr; 1842 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1843 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1844 PetscInt *cmap,*idx_p; 1845 1846 PetscFunctionBegin; 1847 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1848 mat->getrowactive = PETSC_TRUE; 1849 1850 if (!mat->rowvalues && (idx || v)) { 1851 /* 1852 allocate enough space to hold information from the longest row. 1853 */ 1854 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1855 PetscInt max = 1,tmp; 1856 for (i=0; i<matin->rmap->n; i++) { 1857 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1858 if (max < tmp) { max = tmp; } 1859 } 1860 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1861 } 1862 1863 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1864 lrow = row - rstart; 1865 1866 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1867 if (!v) {pvA = 0; pvB = 0;} 1868 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1869 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1870 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1871 nztot = nzA + nzB; 1872 1873 cmap = mat->garray; 1874 if (v || idx) { 1875 if (nztot) { 1876 /* Sort by increasing column numbers, assuming A and B already sorted */ 1877 PetscInt imark = -1; 1878 if (v) { 1879 *v = v_p = mat->rowvalues; 1880 for (i=0; i<nzB; i++) { 1881 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1882 else break; 1883 } 1884 imark = i; 1885 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1886 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1887 } 1888 if (idx) { 1889 *idx = idx_p = mat->rowindices; 1890 if (imark > -1) { 1891 for (i=0; i<imark; i++) { 1892 idx_p[i] = cmap[cworkB[i]]; 1893 } 1894 } else { 1895 for (i=0; i<nzB; i++) { 1896 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1897 else break; 1898 } 1899 imark = i; 1900 } 1901 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1902 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1903 } 1904 } else { 1905 if (idx) *idx = 0; 1906 if (v) *v = 0; 1907 } 1908 } 1909 *nz = nztot; 1910 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1911 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1917 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1918 { 1919 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1920 1921 PetscFunctionBegin; 1922 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1923 aij->getrowactive = PETSC_FALSE; 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatNorm_MPIAIJ" 1929 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1930 { 1931 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1932 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1933 PetscErrorCode ierr; 1934 PetscInt i,j,cstart = mat->cmap->rstart; 1935 PetscReal sum = 0.0; 1936 MatScalar *v; 1937 1938 PetscFunctionBegin; 1939 if (aij->size == 1) { 1940 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1941 } else { 1942 if (type == NORM_FROBENIUS) { 1943 v = amat->a; 1944 for (i=0; i<amat->nz; i++) { 1945 #if defined(PETSC_USE_COMPLEX) 1946 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1947 #else 1948 sum += (*v)*(*v); v++; 1949 #endif 1950 } 1951 v = bmat->a; 1952 for (i=0; i<bmat->nz; i++) { 1953 #if defined(PETSC_USE_COMPLEX) 1954 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1955 #else 1956 sum += (*v)*(*v); v++; 1957 #endif 1958 } 1959 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1960 *norm = PetscSqrtReal(*norm); 1961 } else if (type == NORM_1) { /* max column norm */ 1962 PetscReal *tmp,*tmp2; 1963 PetscInt *jj,*garray = aij->garray; 1964 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1965 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1966 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1967 *norm = 0.0; 1968 v = amat->a; jj = amat->j; 1969 for (j=0; j<amat->nz; j++) { 1970 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1971 } 1972 v = bmat->a; jj = bmat->j; 1973 for (j=0; j<bmat->nz; j++) { 1974 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1975 } 1976 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1977 for (j=0; j<mat->cmap->N; j++) { 1978 if (tmp2[j] > *norm) *norm = tmp2[j]; 1979 } 1980 ierr = PetscFree(tmp);CHKERRQ(ierr); 1981 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1982 } else if (type == NORM_INFINITY) { /* max row norm */ 1983 PetscReal ntemp = 0.0; 1984 for (j=0; j<aij->A->rmap->n; j++) { 1985 v = amat->a + amat->i[j]; 1986 sum = 0.0; 1987 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1988 sum += PetscAbsScalar(*v); v++; 1989 } 1990 v = bmat->a + bmat->i[j]; 1991 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1992 sum += PetscAbsScalar(*v); v++; 1993 } 1994 if (sum > ntemp) ntemp = sum; 1995 } 1996 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1997 } else { 1998 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1999 } 2000 } 2001 PetscFunctionReturn(0); 2002 } 2003 2004 #undef __FUNCT__ 2005 #define __FUNCT__ "MatTranspose_MPIAIJ" 2006 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2007 { 2008 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2009 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2010 PetscErrorCode ierr; 2011 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2012 PetscInt cstart=A->cmap->rstart,ncol; 2013 Mat B; 2014 MatScalar *array; 2015 2016 PetscFunctionBegin; 2017 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2018 2019 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2020 ai = Aloc->i; aj = Aloc->j; 2021 bi = Bloc->i; bj = Bloc->j; 2022 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2023 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2024 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2025 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2026 for (i=0; i<ai[ma]; i++){ 2027 d_nnz[aj[i]] ++; 2028 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2029 } 2030 2031 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2032 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2033 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2034 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2035 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2036 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2037 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2038 } else { 2039 B = *matout; 2040 } 2041 2042 /* copy over the A part */ 2043 array = Aloc->a; 2044 row = A->rmap->rstart; 2045 for (i=0; i<ma; i++) { 2046 ncol = ai[i+1]-ai[i]; 2047 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2048 row++; array += ncol; aj += ncol; 2049 } 2050 aj = Aloc->j; 2051 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2052 2053 /* copy over the B part */ 2054 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2055 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2056 array = Bloc->a; 2057 row = A->rmap->rstart; 2058 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2059 cols_tmp = cols; 2060 for (i=0; i<mb; i++) { 2061 ncol = bi[i+1]-bi[i]; 2062 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2063 row++; array += ncol; cols_tmp += ncol; 2064 } 2065 ierr = PetscFree(cols);CHKERRQ(ierr); 2066 2067 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2068 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2070 *matout = B; 2071 } else { 2072 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2079 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2080 { 2081 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2082 Mat a = aij->A,b = aij->B; 2083 PetscErrorCode ierr; 2084 PetscInt s1,s2,s3; 2085 2086 PetscFunctionBegin; 2087 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2088 if (rr) { 2089 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2090 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2091 /* Overlap communication with computation. */ 2092 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2093 } 2094 if (ll) { 2095 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2096 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2097 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2098 } 2099 /* scale the diagonal block */ 2100 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2101 2102 if (rr) { 2103 /* Do a scatter end and then right scale the off-diagonal block */ 2104 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2105 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2106 } 2107 2108 PetscFunctionReturn(0); 2109 } 2110 2111 #undef __FUNCT__ 2112 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2113 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2114 { 2115 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatEqual_MPIAIJ" 2125 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2126 { 2127 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2128 Mat a,b,c,d; 2129 PetscBool flg; 2130 PetscErrorCode ierr; 2131 2132 PetscFunctionBegin; 2133 a = matA->A; b = matA->B; 2134 c = matB->A; d = matB->B; 2135 2136 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2137 if (flg) { 2138 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2139 } 2140 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 #undef __FUNCT__ 2145 #define __FUNCT__ "MatCopy_MPIAIJ" 2146 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2147 { 2148 PetscErrorCode ierr; 2149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2150 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2151 2152 PetscFunctionBegin; 2153 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2154 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2155 /* because of the column compression in the off-processor part of the matrix a->B, 2156 the number of columns in a->B and b->B may be different, hence we cannot call 2157 the MatCopy() directly on the two parts. If need be, we can provide a more 2158 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2159 then copying the submatrices */ 2160 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2161 } else { 2162 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2163 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatSetUp_MPIAIJ" 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2181 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2182 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2183 { 2184 PetscInt i,m=Y->rmap->N; 2185 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2186 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2187 const PetscInt *xi = x->i,*yi = y->i; 2188 2189 PetscFunctionBegin; 2190 /* Set the number of nonzeros in the new matrix */ 2191 for(i=0; i<m; i++) { 2192 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2193 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2194 nnz[i] = 0; 2195 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2196 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2197 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2198 nnz[i]++; 2199 } 2200 for (; k<nzy; k++) nnz[i]++; 2201 } 2202 PetscFunctionReturn(0); 2203 } 2204 2205 #undef __FUNCT__ 2206 #define __FUNCT__ "MatAXPY_MPIAIJ" 2207 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2208 { 2209 PetscErrorCode ierr; 2210 PetscInt i; 2211 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2212 PetscBLASInt bnz,one=1; 2213 Mat_SeqAIJ *x,*y; 2214 2215 PetscFunctionBegin; 2216 if (str == SAME_NONZERO_PATTERN) { 2217 PetscScalar alpha = a; 2218 x = (Mat_SeqAIJ *)xx->A->data; 2219 y = (Mat_SeqAIJ *)yy->A->data; 2220 bnz = PetscBLASIntCast(x->nz); 2221 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2222 x = (Mat_SeqAIJ *)xx->B->data; 2223 y = (Mat_SeqAIJ *)yy->B->data; 2224 bnz = PetscBLASIntCast(x->nz); 2225 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2226 } else if (str == SUBSET_NONZERO_PATTERN) { 2227 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2228 2229 x = (Mat_SeqAIJ *)xx->B->data; 2230 y = (Mat_SeqAIJ *)yy->B->data; 2231 if (y->xtoy && y->XtoY != xx->B) { 2232 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2233 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2234 } 2235 if (!y->xtoy) { /* get xtoy */ 2236 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2237 y->XtoY = xx->B; 2238 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2239 } 2240 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2241 } else { 2242 Mat B; 2243 PetscInt *nnz_d,*nnz_o; 2244 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2245 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2246 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2247 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2248 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2249 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2250 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2251 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2253 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2254 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2255 ierr = MatHeaderReplace(Y,B); 2256 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 2262 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatConjugate_MPIAIJ" 2266 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2267 { 2268 #if defined(PETSC_USE_COMPLEX) 2269 PetscErrorCode ierr; 2270 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2271 2272 PetscFunctionBegin; 2273 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2274 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2275 #else 2276 PetscFunctionBegin; 2277 #endif 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatRealPart_MPIAIJ" 2283 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2284 { 2285 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2286 PetscErrorCode ierr; 2287 2288 PetscFunctionBegin; 2289 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2290 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2296 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2297 { 2298 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2299 PetscErrorCode ierr; 2300 2301 PetscFunctionBegin; 2302 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2303 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #ifdef PETSC_HAVE_PBGL 2308 2309 #include <boost/parallel/mpi/bsp_process_group.hpp> 2310 #include <boost/graph/distributed/ilu_default_graph.hpp> 2311 #include <boost/graph/distributed/ilu_0_block.hpp> 2312 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2313 #include <boost/graph/distributed/petsc/interface.hpp> 2314 #include <boost/multi_array.hpp> 2315 #include <boost/parallel/distributed_property_map->hpp> 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2319 /* 2320 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2321 */ 2322 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2323 { 2324 namespace petsc = boost::distributed::petsc; 2325 2326 namespace graph_dist = boost::graph::distributed; 2327 using boost::graph::distributed::ilu_default::process_group_type; 2328 using boost::graph::ilu_permuted; 2329 2330 PetscBool row_identity, col_identity; 2331 PetscContainer c; 2332 PetscInt m, n, M, N; 2333 PetscErrorCode ierr; 2334 2335 PetscFunctionBegin; 2336 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2337 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2338 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2339 if (!row_identity || !col_identity) { 2340 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2341 } 2342 2343 process_group_type pg; 2344 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2345 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2346 lgraph_type& level_graph = *lgraph_p; 2347 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2348 2349 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2350 ilu_permuted(level_graph); 2351 2352 /* put together the new matrix */ 2353 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2354 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2355 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2356 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2357 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2358 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2359 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 2362 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2363 ierr = PetscContainerSetPointer(c, lgraph_p); 2364 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2365 ierr = PetscContainerDestroy(&c); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2371 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2372 { 2373 PetscFunctionBegin; 2374 PetscFunctionReturn(0); 2375 } 2376 2377 #undef __FUNCT__ 2378 #define __FUNCT__ "MatSolve_MPIAIJ" 2379 /* 2380 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2381 */ 2382 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2383 { 2384 namespace graph_dist = boost::graph::distributed; 2385 2386 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2387 lgraph_type* lgraph_p; 2388 PetscContainer c; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2393 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2394 ierr = VecCopy(b, x);CHKERRQ(ierr); 2395 2396 PetscScalar* array_x; 2397 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2398 PetscInt sx; 2399 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2400 2401 PetscScalar* array_b; 2402 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2403 PetscInt sb; 2404 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2405 2406 lgraph_type& level_graph = *lgraph_p; 2407 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2408 2409 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2410 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2411 ref_x(array_x, boost::extents[num_vertices(graph)]); 2412 2413 typedef boost::iterator_property_map<array_ref_type::iterator, 2414 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2415 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2416 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2417 2418 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2419 2420 PetscFunctionReturn(0); 2421 } 2422 #endif 2423 2424 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2425 PetscInt nzlocal,nsends,nrecvs; 2426 PetscMPIInt *send_rank,*recv_rank; 2427 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2428 PetscScalar *sbuf_a,**rbuf_a; 2429 PetscErrorCode (*Destroy)(Mat); 2430 } Mat_Redundant; 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2434 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2435 { 2436 PetscErrorCode ierr; 2437 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2438 PetscInt i; 2439 2440 PetscFunctionBegin; 2441 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2442 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2443 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2444 for (i=0; i<redund->nrecvs; i++){ 2445 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2446 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2447 } 2448 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2449 ierr = PetscFree(redund);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatDestroy_MatRedundant" 2455 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2456 { 2457 PetscErrorCode ierr; 2458 PetscContainer container; 2459 Mat_Redundant *redund=PETSC_NULL; 2460 2461 PetscFunctionBegin; 2462 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2463 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2464 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2465 A->ops->destroy = redund->Destroy; 2466 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2467 if (A->ops->destroy) { 2468 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2469 } 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2475 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2476 { 2477 PetscMPIInt rank,size; 2478 MPI_Comm comm=((PetscObject)mat)->comm; 2479 PetscErrorCode ierr; 2480 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2481 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2482 PetscInt *rowrange=mat->rmap->range; 2483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2484 Mat A=aij->A,B=aij->B,C=*matredundant; 2485 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2486 PetscScalar *sbuf_a; 2487 PetscInt nzlocal=a->nz+b->nz; 2488 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2489 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2490 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2491 MatScalar *aworkA,*aworkB; 2492 PetscScalar *vals; 2493 PetscMPIInt tag1,tag2,tag3,imdex; 2494 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2495 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2496 MPI_Status recv_status,*send_status; 2497 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2498 PetscInt **rbuf_j=PETSC_NULL; 2499 PetscScalar **rbuf_a=PETSC_NULL; 2500 Mat_Redundant *redund=PETSC_NULL; 2501 PetscContainer container; 2502 2503 PetscFunctionBegin; 2504 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2505 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2506 2507 if (reuse == MAT_REUSE_MATRIX) { 2508 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2509 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2510 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2511 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2512 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2513 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2514 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2515 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2516 2517 nsends = redund->nsends; 2518 nrecvs = redund->nrecvs; 2519 send_rank = redund->send_rank; 2520 recv_rank = redund->recv_rank; 2521 sbuf_nz = redund->sbuf_nz; 2522 rbuf_nz = redund->rbuf_nz; 2523 sbuf_j = redund->sbuf_j; 2524 sbuf_a = redund->sbuf_a; 2525 rbuf_j = redund->rbuf_j; 2526 rbuf_a = redund->rbuf_a; 2527 } 2528 2529 if (reuse == MAT_INITIAL_MATRIX){ 2530 PetscMPIInt subrank,subsize; 2531 PetscInt nleftover,np_subcomm; 2532 /* get the destination processors' id send_rank, nsends and nrecvs */ 2533 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2534 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2535 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2536 np_subcomm = size/nsubcomm; 2537 nleftover = size - nsubcomm*np_subcomm; 2538 nsends = 0; nrecvs = 0; 2539 for (i=0; i<size; i++){ /* i=rank*/ 2540 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2541 send_rank[nsends] = i; nsends++; 2542 recv_rank[nrecvs++] = i; 2543 } 2544 } 2545 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2546 i = size-nleftover-1; 2547 j = 0; 2548 while (j < nsubcomm - nleftover){ 2549 send_rank[nsends++] = i; 2550 i--; j++; 2551 } 2552 } 2553 2554 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2555 for (i=0; i<nleftover; i++){ 2556 recv_rank[nrecvs++] = size-nleftover+i; 2557 } 2558 } 2559 2560 /* allocate sbuf_j, sbuf_a */ 2561 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2562 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2563 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2564 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2565 2566 /* copy mat's local entries into the buffers */ 2567 if (reuse == MAT_INITIAL_MATRIX){ 2568 rownz_max = 0; 2569 rptr = sbuf_j; 2570 cols = sbuf_j + rend-rstart + 1; 2571 vals = sbuf_a; 2572 rptr[0] = 0; 2573 for (i=0; i<rend-rstart; i++){ 2574 row = i + rstart; 2575 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2576 ncols = nzA + nzB; 2577 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2578 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2579 /* load the column indices for this row into cols */ 2580 lwrite = 0; 2581 for (l=0; l<nzB; l++) { 2582 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2583 vals[lwrite] = aworkB[l]; 2584 cols[lwrite++] = ctmp; 2585 } 2586 } 2587 for (l=0; l<nzA; l++){ 2588 vals[lwrite] = aworkA[l]; 2589 cols[lwrite++] = cstart + cworkA[l]; 2590 } 2591 for (l=0; l<nzB; l++) { 2592 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2593 vals[lwrite] = aworkB[l]; 2594 cols[lwrite++] = ctmp; 2595 } 2596 } 2597 vals += ncols; 2598 cols += ncols; 2599 rptr[i+1] = rptr[i] + ncols; 2600 if (rownz_max < ncols) rownz_max = ncols; 2601 } 2602 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2603 } else { /* only copy matrix values into sbuf_a */ 2604 rptr = sbuf_j; 2605 vals = sbuf_a; 2606 rptr[0] = 0; 2607 for (i=0; i<rend-rstart; i++){ 2608 row = i + rstart; 2609 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2610 ncols = nzA + nzB; 2611 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2612 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2613 lwrite = 0; 2614 for (l=0; l<nzB; l++) { 2615 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2616 } 2617 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2618 for (l=0; l<nzB; l++) { 2619 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2620 } 2621 vals += ncols; 2622 rptr[i+1] = rptr[i] + ncols; 2623 } 2624 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2625 2626 /* send nzlocal to others, and recv other's nzlocal */ 2627 /*--------------------------------------------------*/ 2628 if (reuse == MAT_INITIAL_MATRIX){ 2629 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2630 s_waits2 = s_waits3 + nsends; 2631 s_waits1 = s_waits2 + nsends; 2632 r_waits1 = s_waits1 + nsends; 2633 r_waits2 = r_waits1 + nrecvs; 2634 r_waits3 = r_waits2 + nrecvs; 2635 } else { 2636 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2637 r_waits3 = s_waits3 + nsends; 2638 } 2639 2640 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2641 if (reuse == MAT_INITIAL_MATRIX){ 2642 /* get new tags to keep the communication clean */ 2643 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2644 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2645 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2646 2647 /* post receives of other's nzlocal */ 2648 for (i=0; i<nrecvs; i++){ 2649 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2650 } 2651 /* send nzlocal to others */ 2652 for (i=0; i<nsends; i++){ 2653 sbuf_nz[i] = nzlocal; 2654 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2655 } 2656 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2657 count = nrecvs; 2658 while (count) { 2659 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2660 recv_rank[imdex] = recv_status.MPI_SOURCE; 2661 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2662 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2663 2664 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2665 rbuf_nz[imdex] += i + 2; 2666 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2667 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2668 count--; 2669 } 2670 /* wait on sends of nzlocal */ 2671 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2672 /* send mat->i,j to others, and recv from other's */ 2673 /*------------------------------------------------*/ 2674 for (i=0; i<nsends; i++){ 2675 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2676 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2677 } 2678 /* wait on receives of mat->i,j */ 2679 /*------------------------------*/ 2680 count = nrecvs; 2681 while (count) { 2682 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2683 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2684 count--; 2685 } 2686 /* wait on sends of mat->i,j */ 2687 /*---------------------------*/ 2688 if (nsends) { 2689 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2690 } 2691 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2692 2693 /* post receives, send and receive mat->a */ 2694 /*----------------------------------------*/ 2695 for (imdex=0; imdex<nrecvs; imdex++) { 2696 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2697 } 2698 for (i=0; i<nsends; i++){ 2699 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2700 } 2701 count = nrecvs; 2702 while (count) { 2703 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2704 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2705 count--; 2706 } 2707 if (nsends) { 2708 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2709 } 2710 2711 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2712 2713 /* create redundant matrix */ 2714 /*-------------------------*/ 2715 if (reuse == MAT_INITIAL_MATRIX){ 2716 /* compute rownz_max for preallocation */ 2717 for (imdex=0; imdex<nrecvs; imdex++){ 2718 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2719 rptr = rbuf_j[imdex]; 2720 for (i=0; i<j; i++){ 2721 ncols = rptr[i+1] - rptr[i]; 2722 if (rownz_max < ncols) rownz_max = ncols; 2723 } 2724 } 2725 2726 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2727 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2728 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2729 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2730 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2731 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2732 } else { 2733 C = *matredundant; 2734 } 2735 2736 /* insert local matrix entries */ 2737 rptr = sbuf_j; 2738 cols = sbuf_j + rend-rstart + 1; 2739 vals = sbuf_a; 2740 for (i=0; i<rend-rstart; i++){ 2741 row = i + rstart; 2742 ncols = rptr[i+1] - rptr[i]; 2743 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2744 vals += ncols; 2745 cols += ncols; 2746 } 2747 /* insert received matrix entries */ 2748 for (imdex=0; imdex<nrecvs; imdex++){ 2749 rstart = rowrange[recv_rank[imdex]]; 2750 rend = rowrange[recv_rank[imdex]+1]; 2751 rptr = rbuf_j[imdex]; 2752 cols = rbuf_j[imdex] + rend-rstart + 1; 2753 vals = rbuf_a[imdex]; 2754 for (i=0; i<rend-rstart; i++){ 2755 row = i + rstart; 2756 ncols = rptr[i+1] - rptr[i]; 2757 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2758 vals += ncols; 2759 cols += ncols; 2760 } 2761 } 2762 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2763 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2764 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2765 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2766 if (reuse == MAT_INITIAL_MATRIX) { 2767 PetscContainer container; 2768 *matredundant = C; 2769 /* create a supporting struct and attach it to C for reuse */ 2770 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2771 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2772 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2773 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2774 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2775 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2776 2777 redund->nzlocal = nzlocal; 2778 redund->nsends = nsends; 2779 redund->nrecvs = nrecvs; 2780 redund->send_rank = send_rank; 2781 redund->recv_rank = recv_rank; 2782 redund->sbuf_nz = sbuf_nz; 2783 redund->rbuf_nz = rbuf_nz; 2784 redund->sbuf_j = sbuf_j; 2785 redund->sbuf_a = sbuf_a; 2786 redund->rbuf_j = rbuf_j; 2787 redund->rbuf_a = rbuf_a; 2788 2789 redund->Destroy = C->ops->destroy; 2790 C->ops->destroy = MatDestroy_MatRedundant; 2791 } 2792 PetscFunctionReturn(0); 2793 } 2794 2795 #undef __FUNCT__ 2796 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2797 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2798 { 2799 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2800 PetscErrorCode ierr; 2801 PetscInt i,*idxb = 0; 2802 PetscScalar *va,*vb; 2803 Vec vtmp; 2804 2805 PetscFunctionBegin; 2806 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2807 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2808 if (idx) { 2809 for (i=0; i<A->rmap->n; i++) { 2810 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2811 } 2812 } 2813 2814 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2815 if (idx) { 2816 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2817 } 2818 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2819 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2820 2821 for (i=0; i<A->rmap->n; i++){ 2822 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2823 va[i] = vb[i]; 2824 if (idx) idx[i] = a->garray[idxb[i]]; 2825 } 2826 } 2827 2828 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2829 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2830 ierr = PetscFree(idxb);CHKERRQ(ierr); 2831 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2832 PetscFunctionReturn(0); 2833 } 2834 2835 #undef __FUNCT__ 2836 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2837 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2838 { 2839 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2840 PetscErrorCode ierr; 2841 PetscInt i,*idxb = 0; 2842 PetscScalar *va,*vb; 2843 Vec vtmp; 2844 2845 PetscFunctionBegin; 2846 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2847 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2848 if (idx) { 2849 for (i=0; i<A->cmap->n; i++) { 2850 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2851 } 2852 } 2853 2854 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2855 if (idx) { 2856 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2857 } 2858 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2859 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2860 2861 for (i=0; i<A->rmap->n; i++){ 2862 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2863 va[i] = vb[i]; 2864 if (idx) idx[i] = a->garray[idxb[i]]; 2865 } 2866 } 2867 2868 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2869 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2870 ierr = PetscFree(idxb);CHKERRQ(ierr); 2871 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2872 PetscFunctionReturn(0); 2873 } 2874 2875 #undef __FUNCT__ 2876 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2877 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2878 { 2879 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2880 PetscInt n = A->rmap->n; 2881 PetscInt cstart = A->cmap->rstart; 2882 PetscInt *cmap = mat->garray; 2883 PetscInt *diagIdx, *offdiagIdx; 2884 Vec diagV, offdiagV; 2885 PetscScalar *a, *diagA, *offdiagA; 2886 PetscInt r; 2887 PetscErrorCode ierr; 2888 2889 PetscFunctionBegin; 2890 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2891 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2892 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2893 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2894 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2895 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2896 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2897 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2898 for(r = 0; r < n; ++r) { 2899 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2900 a[r] = diagA[r]; 2901 idx[r] = cstart + diagIdx[r]; 2902 } else { 2903 a[r] = offdiagA[r]; 2904 idx[r] = cmap[offdiagIdx[r]]; 2905 } 2906 } 2907 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2908 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2909 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2910 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2911 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2912 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2913 PetscFunctionReturn(0); 2914 } 2915 2916 #undef __FUNCT__ 2917 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2918 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2919 { 2920 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2921 PetscInt n = A->rmap->n; 2922 PetscInt cstart = A->cmap->rstart; 2923 PetscInt *cmap = mat->garray; 2924 PetscInt *diagIdx, *offdiagIdx; 2925 Vec diagV, offdiagV; 2926 PetscScalar *a, *diagA, *offdiagA; 2927 PetscInt r; 2928 PetscErrorCode ierr; 2929 2930 PetscFunctionBegin; 2931 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2932 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2933 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2934 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2935 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2936 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2937 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2938 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2939 for(r = 0; r < n; ++r) { 2940 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2941 a[r] = diagA[r]; 2942 idx[r] = cstart + diagIdx[r]; 2943 } else { 2944 a[r] = offdiagA[r]; 2945 idx[r] = cmap[offdiagIdx[r]]; 2946 } 2947 } 2948 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2949 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2950 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2951 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2952 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2953 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2954 PetscFunctionReturn(0); 2955 } 2956 2957 #undef __FUNCT__ 2958 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2959 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2960 { 2961 PetscErrorCode ierr; 2962 Mat *dummy; 2963 2964 PetscFunctionBegin; 2965 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2966 *newmat = *dummy; 2967 ierr = PetscFree(dummy);CHKERRQ(ierr); 2968 PetscFunctionReturn(0); 2969 } 2970 2971 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2975 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2976 { 2977 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2978 PetscErrorCode ierr; 2979 2980 PetscFunctionBegin; 2981 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2982 PetscFunctionReturn(0); 2983 } 2984 2985 2986 /* -------------------------------------------------------------------*/ 2987 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2988 MatGetRow_MPIAIJ, 2989 MatRestoreRow_MPIAIJ, 2990 MatMult_MPIAIJ, 2991 /* 4*/ MatMultAdd_MPIAIJ, 2992 MatMultTranspose_MPIAIJ, 2993 MatMultTransposeAdd_MPIAIJ, 2994 #ifdef PETSC_HAVE_PBGL 2995 MatSolve_MPIAIJ, 2996 #else 2997 0, 2998 #endif 2999 0, 3000 0, 3001 /*10*/ 0, 3002 0, 3003 0, 3004 MatSOR_MPIAIJ, 3005 MatTranspose_MPIAIJ, 3006 /*15*/ MatGetInfo_MPIAIJ, 3007 MatEqual_MPIAIJ, 3008 MatGetDiagonal_MPIAIJ, 3009 MatDiagonalScale_MPIAIJ, 3010 MatNorm_MPIAIJ, 3011 /*20*/ MatAssemblyBegin_MPIAIJ, 3012 MatAssemblyEnd_MPIAIJ, 3013 MatSetOption_MPIAIJ, 3014 MatZeroEntries_MPIAIJ, 3015 /*24*/ MatZeroRows_MPIAIJ, 3016 0, 3017 #ifdef PETSC_HAVE_PBGL 3018 0, 3019 #else 3020 0, 3021 #endif 3022 0, 3023 0, 3024 /*29*/ MatSetUp_MPIAIJ, 3025 #ifdef PETSC_HAVE_PBGL 3026 0, 3027 #else 3028 0, 3029 #endif 3030 0, 3031 0, 3032 0, 3033 /*34*/ MatDuplicate_MPIAIJ, 3034 0, 3035 0, 3036 0, 3037 0, 3038 /*39*/ MatAXPY_MPIAIJ, 3039 MatGetSubMatrices_MPIAIJ, 3040 MatIncreaseOverlap_MPIAIJ, 3041 MatGetValues_MPIAIJ, 3042 MatCopy_MPIAIJ, 3043 /*44*/ MatGetRowMax_MPIAIJ, 3044 MatScale_MPIAIJ, 3045 0, 3046 0, 3047 MatZeroRowsColumns_MPIAIJ, 3048 /*49*/ 0, 3049 0, 3050 0, 3051 0, 3052 0, 3053 /*54*/ MatFDColoringCreate_MPIAIJ, 3054 0, 3055 MatSetUnfactored_MPIAIJ, 3056 MatPermute_MPIAIJ, 3057 0, 3058 /*59*/ MatGetSubMatrix_MPIAIJ, 3059 MatDestroy_MPIAIJ, 3060 MatView_MPIAIJ, 3061 0, 3062 0, 3063 /*64*/ 0, 3064 0, 3065 0, 3066 0, 3067 0, 3068 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3069 MatGetRowMinAbs_MPIAIJ, 3070 0, 3071 MatSetColoring_MPIAIJ, 3072 #if defined(PETSC_HAVE_ADIC) 3073 MatSetValuesAdic_MPIAIJ, 3074 #else 3075 0, 3076 #endif 3077 MatSetValuesAdifor_MPIAIJ, 3078 /*75*/ MatFDColoringApply_AIJ, 3079 0, 3080 0, 3081 0, 3082 0, 3083 /*80*/ 0, 3084 0, 3085 0, 3086 /*83*/ MatLoad_MPIAIJ, 3087 0, 3088 0, 3089 0, 3090 0, 3091 0, 3092 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3093 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3094 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3095 MatPtAP_Basic, 3096 MatPtAPSymbolic_MPIAIJ, 3097 /*94*/ MatPtAPNumeric_MPIAIJ, 3098 0, 3099 0, 3100 0, 3101 0, 3102 /*99*/ 0, 3103 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3104 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3105 MatConjugate_MPIAIJ, 3106 0, 3107 /*104*/MatSetValuesRow_MPIAIJ, 3108 MatRealPart_MPIAIJ, 3109 MatImaginaryPart_MPIAIJ, 3110 0, 3111 0, 3112 /*109*/0, 3113 MatGetRedundantMatrix_MPIAIJ, 3114 MatGetRowMin_MPIAIJ, 3115 0, 3116 0, 3117 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3118 0, 3119 0, 3120 0, 3121 0, 3122 /*119*/0, 3123 0, 3124 0, 3125 0, 3126 MatGetMultiProcBlock_MPIAIJ, 3127 /*124*/MatFindNonZeroRows_MPIAIJ, 3128 MatGetColumnNorms_MPIAIJ, 3129 MatInvertBlockDiagonal_MPIAIJ, 3130 0, 3131 MatGetSubMatricesParallel_MPIAIJ, 3132 /*129*/0, 3133 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3134 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3135 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3136 0, 3137 /*134*/0, 3138 0, 3139 0, 3140 0, 3141 0 3142 }; 3143 3144 /* ----------------------------------------------------------------------------------------*/ 3145 3146 EXTERN_C_BEGIN 3147 #undef __FUNCT__ 3148 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3149 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3150 { 3151 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3152 PetscErrorCode ierr; 3153 3154 PetscFunctionBegin; 3155 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3156 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3157 PetscFunctionReturn(0); 3158 } 3159 EXTERN_C_END 3160 3161 EXTERN_C_BEGIN 3162 #undef __FUNCT__ 3163 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3164 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3165 { 3166 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3167 PetscErrorCode ierr; 3168 3169 PetscFunctionBegin; 3170 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3171 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3172 PetscFunctionReturn(0); 3173 } 3174 EXTERN_C_END 3175 3176 EXTERN_C_BEGIN 3177 #undef __FUNCT__ 3178 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3179 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3180 { 3181 Mat_MPIAIJ *b; 3182 PetscErrorCode ierr; 3183 PetscInt i; 3184 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3185 3186 PetscFunctionBegin; 3187 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3188 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3189 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3190 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3191 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3192 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3193 3194 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3195 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3196 if (d_nnz) { 3197 for (i=0; i<B->rmap->n; i++) { 3198 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3199 } 3200 } 3201 if (o_nnz) { 3202 for (i=0; i<B->rmap->n; i++) { 3203 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3204 } 3205 } 3206 b = (Mat_MPIAIJ*)B->data; 3207 3208 if (!B->preallocated) { 3209 /* Explicitly create 2 MATSEQAIJ matrices. */ 3210 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3211 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3212 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3213 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3214 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3215 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3216 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3217 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3218 } 3219 3220 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3221 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3222 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3223 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3224 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3225 B->preallocated = PETSC_TRUE; 3226 PetscFunctionReturn(0); 3227 } 3228 EXTERN_C_END 3229 3230 #undef __FUNCT__ 3231 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3232 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3233 { 3234 Mat mat; 3235 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3236 PetscErrorCode ierr; 3237 3238 PetscFunctionBegin; 3239 *newmat = 0; 3240 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3241 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3242 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3243 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3244 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3245 a = (Mat_MPIAIJ*)mat->data; 3246 3247 mat->factortype = matin->factortype; 3248 mat->rmap->bs = matin->rmap->bs; 3249 mat->cmap->bs = matin->cmap->bs; 3250 mat->assembled = PETSC_TRUE; 3251 mat->insertmode = NOT_SET_VALUES; 3252 mat->preallocated = PETSC_TRUE; 3253 3254 a->size = oldmat->size; 3255 a->rank = oldmat->rank; 3256 a->donotstash = oldmat->donotstash; 3257 a->roworiented = oldmat->roworiented; 3258 a->rowindices = 0; 3259 a->rowvalues = 0; 3260 a->getrowactive = PETSC_FALSE; 3261 3262 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3263 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3264 3265 if (oldmat->colmap) { 3266 #if defined (PETSC_USE_CTABLE) 3267 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3268 #else 3269 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3270 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3271 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3272 #endif 3273 } else a->colmap = 0; 3274 if (oldmat->garray) { 3275 PetscInt len; 3276 len = oldmat->B->cmap->n; 3277 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3278 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3279 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3280 } else a->garray = 0; 3281 3282 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3283 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3284 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3285 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3286 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3287 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3288 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3289 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3290 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3291 *newmat = mat; 3292 PetscFunctionReturn(0); 3293 } 3294 3295 3296 3297 #undef __FUNCT__ 3298 #define __FUNCT__ "MatLoad_MPIAIJ" 3299 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3300 { 3301 PetscScalar *vals,*svals; 3302 MPI_Comm comm = ((PetscObject)viewer)->comm; 3303 PetscErrorCode ierr; 3304 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3305 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3306 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3307 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3308 PetscInt cend,cstart,n,*rowners,sizesset=1; 3309 int fd; 3310 3311 PetscFunctionBegin; 3312 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3313 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3314 if (!rank) { 3315 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3316 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3317 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3318 } 3319 3320 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3321 3322 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3323 M = header[1]; N = header[2]; 3324 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3325 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3326 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3327 3328 /* If global sizes are set, check if they are consistent with that given in the file */ 3329 if (sizesset) { 3330 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3331 } 3332 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3333 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3334 3335 /* determine ownership of all rows */ 3336 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3337 else m = newMat->rmap->n; /* Set by user */ 3338 3339 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3340 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3341 3342 /* First process needs enough room for process with most rows */ 3343 if (!rank) { 3344 mmax = rowners[1]; 3345 for (i=2; i<size; i++) { 3346 mmax = PetscMax(mmax,rowners[i]); 3347 } 3348 } else mmax = m; 3349 3350 rowners[0] = 0; 3351 for (i=2; i<=size; i++) { 3352 rowners[i] += rowners[i-1]; 3353 } 3354 rstart = rowners[rank]; 3355 rend = rowners[rank+1]; 3356 3357 /* distribute row lengths to all processors */ 3358 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3359 if (!rank) { 3360 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3361 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3362 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3363 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3364 for (j=0; j<m; j++) { 3365 procsnz[0] += ourlens[j]; 3366 } 3367 for (i=1; i<size; i++) { 3368 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3369 /* calculate the number of nonzeros on each processor */ 3370 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3371 procsnz[i] += rowlengths[j]; 3372 } 3373 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3374 } 3375 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3376 } else { 3377 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3378 } 3379 3380 if (!rank) { 3381 /* determine max buffer needed and allocate it */ 3382 maxnz = 0; 3383 for (i=0; i<size; i++) { 3384 maxnz = PetscMax(maxnz,procsnz[i]); 3385 } 3386 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3387 3388 /* read in my part of the matrix column indices */ 3389 nz = procsnz[0]; 3390 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3391 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3392 3393 /* read in every one elses and ship off */ 3394 for (i=1; i<size; i++) { 3395 nz = procsnz[i]; 3396 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3397 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3398 } 3399 ierr = PetscFree(cols);CHKERRQ(ierr); 3400 } else { 3401 /* determine buffer space needed for message */ 3402 nz = 0; 3403 for (i=0; i<m; i++) { 3404 nz += ourlens[i]; 3405 } 3406 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3407 3408 /* receive message of column indices*/ 3409 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3410 } 3411 3412 /* determine column ownership if matrix is not square */ 3413 if (N != M) { 3414 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3415 else n = newMat->cmap->n; 3416 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3417 cstart = cend - n; 3418 } else { 3419 cstart = rstart; 3420 cend = rend; 3421 n = cend - cstart; 3422 } 3423 3424 /* loop over local rows, determining number of off diagonal entries */ 3425 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3426 jj = 0; 3427 for (i=0; i<m; i++) { 3428 for (j=0; j<ourlens[i]; j++) { 3429 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3430 jj++; 3431 } 3432 } 3433 3434 for (i=0; i<m; i++) { 3435 ourlens[i] -= offlens[i]; 3436 } 3437 if (!sizesset) { 3438 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3439 } 3440 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3441 3442 for (i=0; i<m; i++) { 3443 ourlens[i] += offlens[i]; 3444 } 3445 3446 if (!rank) { 3447 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3448 3449 /* read in my part of the matrix numerical values */ 3450 nz = procsnz[0]; 3451 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3452 3453 /* insert into matrix */ 3454 jj = rstart; 3455 smycols = mycols; 3456 svals = vals; 3457 for (i=0; i<m; i++) { 3458 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3459 smycols += ourlens[i]; 3460 svals += ourlens[i]; 3461 jj++; 3462 } 3463 3464 /* read in other processors and ship out */ 3465 for (i=1; i<size; i++) { 3466 nz = procsnz[i]; 3467 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3468 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3469 } 3470 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3471 } else { 3472 /* receive numeric values */ 3473 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3474 3475 /* receive message of values*/ 3476 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3477 3478 /* insert into matrix */ 3479 jj = rstart; 3480 smycols = mycols; 3481 svals = vals; 3482 for (i=0; i<m; i++) { 3483 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3484 smycols += ourlens[i]; 3485 svals += ourlens[i]; 3486 jj++; 3487 } 3488 } 3489 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3490 ierr = PetscFree(vals);CHKERRQ(ierr); 3491 ierr = PetscFree(mycols);CHKERRQ(ierr); 3492 ierr = PetscFree(rowners);CHKERRQ(ierr); 3493 3494 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3495 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3496 PetscFunctionReturn(0); 3497 } 3498 3499 #undef __FUNCT__ 3500 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3501 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3502 { 3503 PetscErrorCode ierr; 3504 IS iscol_local; 3505 PetscInt csize; 3506 3507 PetscFunctionBegin; 3508 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3509 if (call == MAT_REUSE_MATRIX) { 3510 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3511 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3512 } else { 3513 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3514 } 3515 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3516 if (call == MAT_INITIAL_MATRIX) { 3517 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3518 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3519 } 3520 PetscFunctionReturn(0); 3521 } 3522 3523 #undef __FUNCT__ 3524 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3525 /* 3526 Not great since it makes two copies of the submatrix, first an SeqAIJ 3527 in local and then by concatenating the local matrices the end result. 3528 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3529 3530 Note: This requires a sequential iscol with all indices. 3531 */ 3532 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3533 { 3534 PetscErrorCode ierr; 3535 PetscMPIInt rank,size; 3536 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3537 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3538 Mat *local,M,Mreuse; 3539 MatScalar *vwork,*aa; 3540 MPI_Comm comm = ((PetscObject)mat)->comm; 3541 Mat_SeqAIJ *aij; 3542 3543 3544 PetscFunctionBegin; 3545 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3546 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3547 3548 if (call == MAT_REUSE_MATRIX) { 3549 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3550 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3551 local = &Mreuse; 3552 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3553 } else { 3554 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3555 Mreuse = *local; 3556 ierr = PetscFree(local);CHKERRQ(ierr); 3557 } 3558 3559 /* 3560 m - number of local rows 3561 n - number of columns (same on all processors) 3562 rstart - first row in new global matrix generated 3563 */ 3564 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3565 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3566 if (call == MAT_INITIAL_MATRIX) { 3567 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3568 ii = aij->i; 3569 jj = aij->j; 3570 3571 /* 3572 Determine the number of non-zeros in the diagonal and off-diagonal 3573 portions of the matrix in order to do correct preallocation 3574 */ 3575 3576 /* first get start and end of "diagonal" columns */ 3577 if (csize == PETSC_DECIDE) { 3578 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3579 if (mglobal == n) { /* square matrix */ 3580 nlocal = m; 3581 } else { 3582 nlocal = n/size + ((n % size) > rank); 3583 } 3584 } else { 3585 nlocal = csize; 3586 } 3587 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3588 rstart = rend - nlocal; 3589 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3590 3591 /* next, compute all the lengths */ 3592 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3593 olens = dlens + m; 3594 for (i=0; i<m; i++) { 3595 jend = ii[i+1] - ii[i]; 3596 olen = 0; 3597 dlen = 0; 3598 for (j=0; j<jend; j++) { 3599 if (*jj < rstart || *jj >= rend) olen++; 3600 else dlen++; 3601 jj++; 3602 } 3603 olens[i] = olen; 3604 dlens[i] = dlen; 3605 } 3606 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3607 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3608 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3609 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3610 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3611 ierr = PetscFree(dlens);CHKERRQ(ierr); 3612 } else { 3613 PetscInt ml,nl; 3614 3615 M = *newmat; 3616 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3617 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3618 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3619 /* 3620 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3621 rather than the slower MatSetValues(). 3622 */ 3623 M->was_assembled = PETSC_TRUE; 3624 M->assembled = PETSC_FALSE; 3625 } 3626 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3627 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3628 ii = aij->i; 3629 jj = aij->j; 3630 aa = aij->a; 3631 for (i=0; i<m; i++) { 3632 row = rstart + i; 3633 nz = ii[i+1] - ii[i]; 3634 cwork = jj; jj += nz; 3635 vwork = aa; aa += nz; 3636 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3637 } 3638 3639 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3640 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3641 *newmat = M; 3642 3643 /* save submatrix used in processor for next request */ 3644 if (call == MAT_INITIAL_MATRIX) { 3645 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3646 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3647 } 3648 3649 PetscFunctionReturn(0); 3650 } 3651 3652 EXTERN_C_BEGIN 3653 #undef __FUNCT__ 3654 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3655 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3656 { 3657 PetscInt m,cstart, cend,j,nnz,i,d; 3658 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3659 const PetscInt *JJ; 3660 PetscScalar *values; 3661 PetscErrorCode ierr; 3662 3663 PetscFunctionBegin; 3664 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3665 3666 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3667 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3668 m = B->rmap->n; 3669 cstart = B->cmap->rstart; 3670 cend = B->cmap->rend; 3671 rstart = B->rmap->rstart; 3672 3673 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3674 3675 #if defined(PETSC_USE_DEBUGGING) 3676 for (i=0; i<m; i++) { 3677 nnz = Ii[i+1]- Ii[i]; 3678 JJ = J + Ii[i]; 3679 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3680 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3681 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3682 } 3683 #endif 3684 3685 for (i=0; i<m; i++) { 3686 nnz = Ii[i+1]- Ii[i]; 3687 JJ = J + Ii[i]; 3688 nnz_max = PetscMax(nnz_max,nnz); 3689 d = 0; 3690 for (j=0; j<nnz; j++) { 3691 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3692 } 3693 d_nnz[i] = d; 3694 o_nnz[i] = nnz - d; 3695 } 3696 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3697 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3698 3699 if (v) values = (PetscScalar*)v; 3700 else { 3701 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3702 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3703 } 3704 3705 for (i=0; i<m; i++) { 3706 ii = i + rstart; 3707 nnz = Ii[i+1]- Ii[i]; 3708 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3709 } 3710 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3711 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3712 3713 if (!v) { 3714 ierr = PetscFree(values);CHKERRQ(ierr); 3715 } 3716 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3717 PetscFunctionReturn(0); 3718 } 3719 EXTERN_C_END 3720 3721 #undef __FUNCT__ 3722 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3723 /*@ 3724 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3725 (the default parallel PETSc format). 3726 3727 Collective on MPI_Comm 3728 3729 Input Parameters: 3730 + B - the matrix 3731 . i - the indices into j for the start of each local row (starts with zero) 3732 . j - the column indices for each local row (starts with zero) 3733 - v - optional values in the matrix 3734 3735 Level: developer 3736 3737 Notes: 3738 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3739 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3740 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3741 3742 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3743 3744 The format which is used for the sparse matrix input, is equivalent to a 3745 row-major ordering.. i.e for the following matrix, the input data expected is 3746 as shown: 3747 3748 1 0 0 3749 2 0 3 P0 3750 ------- 3751 4 5 6 P1 3752 3753 Process0 [P0]: rows_owned=[0,1] 3754 i = {0,1,3} [size = nrow+1 = 2+1] 3755 j = {0,0,2} [size = nz = 6] 3756 v = {1,2,3} [size = nz = 6] 3757 3758 Process1 [P1]: rows_owned=[2] 3759 i = {0,3} [size = nrow+1 = 1+1] 3760 j = {0,1,2} [size = nz = 6] 3761 v = {4,5,6} [size = nz = 6] 3762 3763 .keywords: matrix, aij, compressed row, sparse, parallel 3764 3765 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3766 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3767 @*/ 3768 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3769 { 3770 PetscErrorCode ierr; 3771 3772 PetscFunctionBegin; 3773 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3774 PetscFunctionReturn(0); 3775 } 3776 3777 #undef __FUNCT__ 3778 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3779 /*@C 3780 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3781 (the default parallel PETSc format). For good matrix assembly performance 3782 the user should preallocate the matrix storage by setting the parameters 3783 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3784 performance can be increased by more than a factor of 50. 3785 3786 Collective on MPI_Comm 3787 3788 Input Parameters: 3789 + A - the matrix 3790 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3791 (same value is used for all local rows) 3792 . d_nnz - array containing the number of nonzeros in the various rows of the 3793 DIAGONAL portion of the local submatrix (possibly different for each row) 3794 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3795 The size of this array is equal to the number of local rows, i.e 'm'. 3796 For matrices that will be factored, you must leave room for (and set) 3797 the diagonal entry even if it is zero. 3798 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3799 submatrix (same value is used for all local rows). 3800 - o_nnz - array containing the number of nonzeros in the various rows of the 3801 OFF-DIAGONAL portion of the local submatrix (possibly different for 3802 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3803 structure. The size of this array is equal to the number 3804 of local rows, i.e 'm'. 3805 3806 If the *_nnz parameter is given then the *_nz parameter is ignored 3807 3808 The AIJ format (also called the Yale sparse matrix format or 3809 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3810 storage. The stored row and column indices begin with zero. 3811 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3812 3813 The parallel matrix is partitioned such that the first m0 rows belong to 3814 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3815 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3816 3817 The DIAGONAL portion of the local submatrix of a processor can be defined 3818 as the submatrix which is obtained by extraction the part corresponding to 3819 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3820 first row that belongs to the processor, r2 is the last row belonging to 3821 the this processor, and c1-c2 is range of indices of the local part of a 3822 vector suitable for applying the matrix to. This is an mxn matrix. In the 3823 common case of a square matrix, the row and column ranges are the same and 3824 the DIAGONAL part is also square. The remaining portion of the local 3825 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3826 3827 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3828 3829 You can call MatGetInfo() to get information on how effective the preallocation was; 3830 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3831 You can also run with the option -info and look for messages with the string 3832 malloc in them to see if additional memory allocation was needed. 3833 3834 Example usage: 3835 3836 Consider the following 8x8 matrix with 34 non-zero values, that is 3837 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3838 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3839 as follows: 3840 3841 .vb 3842 1 2 0 | 0 3 0 | 0 4 3843 Proc0 0 5 6 | 7 0 0 | 8 0 3844 9 0 10 | 11 0 0 | 12 0 3845 ------------------------------------- 3846 13 0 14 | 15 16 17 | 0 0 3847 Proc1 0 18 0 | 19 20 21 | 0 0 3848 0 0 0 | 22 23 0 | 24 0 3849 ------------------------------------- 3850 Proc2 25 26 27 | 0 0 28 | 29 0 3851 30 0 0 | 31 32 33 | 0 34 3852 .ve 3853 3854 This can be represented as a collection of submatrices as: 3855 3856 .vb 3857 A B C 3858 D E F 3859 G H I 3860 .ve 3861 3862 Where the submatrices A,B,C are owned by proc0, D,E,F are 3863 owned by proc1, G,H,I are owned by proc2. 3864 3865 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3866 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3867 The 'M','N' parameters are 8,8, and have the same values on all procs. 3868 3869 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3870 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3871 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3872 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3873 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3874 matrix, ans [DF] as another SeqAIJ matrix. 3875 3876 When d_nz, o_nz parameters are specified, d_nz storage elements are 3877 allocated for every row of the local diagonal submatrix, and o_nz 3878 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3879 One way to choose d_nz and o_nz is to use the max nonzerors per local 3880 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3881 In this case, the values of d_nz,o_nz are: 3882 .vb 3883 proc0 : dnz = 2, o_nz = 2 3884 proc1 : dnz = 3, o_nz = 2 3885 proc2 : dnz = 1, o_nz = 4 3886 .ve 3887 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3888 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3889 for proc3. i.e we are using 12+15+10=37 storage locations to store 3890 34 values. 3891 3892 When d_nnz, o_nnz parameters are specified, the storage is specified 3893 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3894 In the above case the values for d_nnz,o_nnz are: 3895 .vb 3896 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3897 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3898 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3899 .ve 3900 Here the space allocated is sum of all the above values i.e 34, and 3901 hence pre-allocation is perfect. 3902 3903 Level: intermediate 3904 3905 .keywords: matrix, aij, compressed row, sparse, parallel 3906 3907 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3908 MPIAIJ, MatGetInfo() 3909 @*/ 3910 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3911 { 3912 PetscErrorCode ierr; 3913 3914 PetscFunctionBegin; 3915 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3916 PetscValidType(B,1); 3917 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3918 PetscFunctionReturn(0); 3919 } 3920 3921 #undef __FUNCT__ 3922 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3923 /*@ 3924 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3925 CSR format the local rows. 3926 3927 Collective on MPI_Comm 3928 3929 Input Parameters: 3930 + comm - MPI communicator 3931 . m - number of local rows (Cannot be PETSC_DECIDE) 3932 . n - This value should be the same as the local size used in creating the 3933 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3934 calculated if N is given) For square matrices n is almost always m. 3935 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3936 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3937 . i - row indices 3938 . j - column indices 3939 - a - matrix values 3940 3941 Output Parameter: 3942 . mat - the matrix 3943 3944 Level: intermediate 3945 3946 Notes: 3947 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3948 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3949 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3950 3951 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3952 3953 The format which is used for the sparse matrix input, is equivalent to a 3954 row-major ordering.. i.e for the following matrix, the input data expected is 3955 as shown: 3956 3957 1 0 0 3958 2 0 3 P0 3959 ------- 3960 4 5 6 P1 3961 3962 Process0 [P0]: rows_owned=[0,1] 3963 i = {0,1,3} [size = nrow+1 = 2+1] 3964 j = {0,0,2} [size = nz = 6] 3965 v = {1,2,3} [size = nz = 6] 3966 3967 Process1 [P1]: rows_owned=[2] 3968 i = {0,3} [size = nrow+1 = 1+1] 3969 j = {0,1,2} [size = nz = 6] 3970 v = {4,5,6} [size = nz = 6] 3971 3972 .keywords: matrix, aij, compressed row, sparse, parallel 3973 3974 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3975 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3976 @*/ 3977 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3978 { 3979 PetscErrorCode ierr; 3980 3981 PetscFunctionBegin; 3982 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3983 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3984 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3985 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3986 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 3987 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3988 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3989 PetscFunctionReturn(0); 3990 } 3991 3992 #undef __FUNCT__ 3993 #define __FUNCT__ "MatCreateAIJ" 3994 /*@C 3995 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3996 (the default parallel PETSc format). For good matrix assembly performance 3997 the user should preallocate the matrix storage by setting the parameters 3998 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3999 performance can be increased by more than a factor of 50. 4000 4001 Collective on MPI_Comm 4002 4003 Input Parameters: 4004 + comm - MPI communicator 4005 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4006 This value should be the same as the local size used in creating the 4007 y vector for the matrix-vector product y = Ax. 4008 . n - This value should be the same as the local size used in creating the 4009 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4010 calculated if N is given) For square matrices n is almost always m. 4011 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4012 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4013 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4014 (same value is used for all local rows) 4015 . d_nnz - array containing the number of nonzeros in the various rows of the 4016 DIAGONAL portion of the local submatrix (possibly different for each row) 4017 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4018 The size of this array is equal to the number of local rows, i.e 'm'. 4019 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4020 submatrix (same value is used for all local rows). 4021 - o_nnz - array containing the number of nonzeros in the various rows of the 4022 OFF-DIAGONAL portion of the local submatrix (possibly different for 4023 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4024 structure. The size of this array is equal to the number 4025 of local rows, i.e 'm'. 4026 4027 Output Parameter: 4028 . A - the matrix 4029 4030 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4031 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4032 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4033 4034 Notes: 4035 If the *_nnz parameter is given then the *_nz parameter is ignored 4036 4037 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4038 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4039 storage requirements for this matrix. 4040 4041 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4042 processor than it must be used on all processors that share the object for 4043 that argument. 4044 4045 The user MUST specify either the local or global matrix dimensions 4046 (possibly both). 4047 4048 The parallel matrix is partitioned across processors such that the 4049 first m0 rows belong to process 0, the next m1 rows belong to 4050 process 1, the next m2 rows belong to process 2 etc.. where 4051 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4052 values corresponding to [m x N] submatrix. 4053 4054 The columns are logically partitioned with the n0 columns belonging 4055 to 0th partition, the next n1 columns belonging to the next 4056 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4057 4058 The DIAGONAL portion of the local submatrix on any given processor 4059 is the submatrix corresponding to the rows and columns m,n 4060 corresponding to the given processor. i.e diagonal matrix on 4061 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4062 etc. The remaining portion of the local submatrix [m x (N-n)] 4063 constitute the OFF-DIAGONAL portion. The example below better 4064 illustrates this concept. 4065 4066 For a square global matrix we define each processor's diagonal portion 4067 to be its local rows and the corresponding columns (a square submatrix); 4068 each processor's off-diagonal portion encompasses the remainder of the 4069 local matrix (a rectangular submatrix). 4070 4071 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4072 4073 When calling this routine with a single process communicator, a matrix of 4074 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4075 type of communicator, use the construction mechanism: 4076 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4077 4078 By default, this format uses inodes (identical nodes) when possible. 4079 We search for consecutive rows with the same nonzero structure, thereby 4080 reusing matrix information to achieve increased efficiency. 4081 4082 Options Database Keys: 4083 + -mat_no_inode - Do not use inodes 4084 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4085 - -mat_aij_oneindex - Internally use indexing starting at 1 4086 rather than 0. Note that when calling MatSetValues(), 4087 the user still MUST index entries starting at 0! 4088 4089 4090 Example usage: 4091 4092 Consider the following 8x8 matrix with 34 non-zero values, that is 4093 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4094 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4095 as follows: 4096 4097 .vb 4098 1 2 0 | 0 3 0 | 0 4 4099 Proc0 0 5 6 | 7 0 0 | 8 0 4100 9 0 10 | 11 0 0 | 12 0 4101 ------------------------------------- 4102 13 0 14 | 15 16 17 | 0 0 4103 Proc1 0 18 0 | 19 20 21 | 0 0 4104 0 0 0 | 22 23 0 | 24 0 4105 ------------------------------------- 4106 Proc2 25 26 27 | 0 0 28 | 29 0 4107 30 0 0 | 31 32 33 | 0 34 4108 .ve 4109 4110 This can be represented as a collection of submatrices as: 4111 4112 .vb 4113 A B C 4114 D E F 4115 G H I 4116 .ve 4117 4118 Where the submatrices A,B,C are owned by proc0, D,E,F are 4119 owned by proc1, G,H,I are owned by proc2. 4120 4121 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4122 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4123 The 'M','N' parameters are 8,8, and have the same values on all procs. 4124 4125 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4126 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4127 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4128 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4129 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4130 matrix, ans [DF] as another SeqAIJ matrix. 4131 4132 When d_nz, o_nz parameters are specified, d_nz storage elements are 4133 allocated for every row of the local diagonal submatrix, and o_nz 4134 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4135 One way to choose d_nz and o_nz is to use the max nonzerors per local 4136 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4137 In this case, the values of d_nz,o_nz are: 4138 .vb 4139 proc0 : dnz = 2, o_nz = 2 4140 proc1 : dnz = 3, o_nz = 2 4141 proc2 : dnz = 1, o_nz = 4 4142 .ve 4143 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4144 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4145 for proc3. i.e we are using 12+15+10=37 storage locations to store 4146 34 values. 4147 4148 When d_nnz, o_nnz parameters are specified, the storage is specified 4149 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4150 In the above case the values for d_nnz,o_nnz are: 4151 .vb 4152 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4153 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4154 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4155 .ve 4156 Here the space allocated is sum of all the above values i.e 34, and 4157 hence pre-allocation is perfect. 4158 4159 Level: intermediate 4160 4161 .keywords: matrix, aij, compressed row, sparse, parallel 4162 4163 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4164 MPIAIJ, MatCreateMPIAIJWithArrays() 4165 @*/ 4166 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4167 { 4168 PetscErrorCode ierr; 4169 PetscMPIInt size; 4170 4171 PetscFunctionBegin; 4172 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4173 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4175 if (size > 1) { 4176 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4177 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4178 } else { 4179 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4180 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4181 } 4182 PetscFunctionReturn(0); 4183 } 4184 4185 #undef __FUNCT__ 4186 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4187 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4188 { 4189 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4190 4191 PetscFunctionBegin; 4192 *Ad = a->A; 4193 *Ao = a->B; 4194 *colmap = a->garray; 4195 PetscFunctionReturn(0); 4196 } 4197 4198 #undef __FUNCT__ 4199 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4200 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4201 { 4202 PetscErrorCode ierr; 4203 PetscInt i; 4204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4205 4206 PetscFunctionBegin; 4207 if (coloring->ctype == IS_COLORING_GLOBAL) { 4208 ISColoringValue *allcolors,*colors; 4209 ISColoring ocoloring; 4210 4211 /* set coloring for diagonal portion */ 4212 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4213 4214 /* set coloring for off-diagonal portion */ 4215 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4216 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4217 for (i=0; i<a->B->cmap->n; i++) { 4218 colors[i] = allcolors[a->garray[i]]; 4219 } 4220 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4221 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4222 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4223 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4224 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4225 ISColoringValue *colors; 4226 PetscInt *larray; 4227 ISColoring ocoloring; 4228 4229 /* set coloring for diagonal portion */ 4230 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4231 for (i=0; i<a->A->cmap->n; i++) { 4232 larray[i] = i + A->cmap->rstart; 4233 } 4234 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4235 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4236 for (i=0; i<a->A->cmap->n; i++) { 4237 colors[i] = coloring->colors[larray[i]]; 4238 } 4239 ierr = PetscFree(larray);CHKERRQ(ierr); 4240 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4241 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4242 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4243 4244 /* set coloring for off-diagonal portion */ 4245 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4246 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4247 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4248 for (i=0; i<a->B->cmap->n; i++) { 4249 colors[i] = coloring->colors[larray[i]]; 4250 } 4251 ierr = PetscFree(larray);CHKERRQ(ierr); 4252 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4253 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4254 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4255 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4256 4257 PetscFunctionReturn(0); 4258 } 4259 4260 #if defined(PETSC_HAVE_ADIC) 4261 #undef __FUNCT__ 4262 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4263 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4264 { 4265 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4266 PetscErrorCode ierr; 4267 4268 PetscFunctionBegin; 4269 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4270 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4271 PetscFunctionReturn(0); 4272 } 4273 #endif 4274 4275 #undef __FUNCT__ 4276 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4277 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4278 { 4279 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4280 PetscErrorCode ierr; 4281 4282 PetscFunctionBegin; 4283 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4284 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4285 PetscFunctionReturn(0); 4286 } 4287 4288 #undef __FUNCT__ 4289 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4290 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4291 { 4292 PetscErrorCode ierr; 4293 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4294 PetscInt *indx; 4295 4296 PetscFunctionBegin; 4297 /* This routine will ONLY return MPIAIJ type matrix */ 4298 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4299 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4300 if (n == PETSC_DECIDE){ 4301 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4302 } 4303 /* Check sum(n) = N */ 4304 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4305 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4306 4307 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4308 rstart -= m; 4309 4310 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4311 for (i=0;i<m;i++) { 4312 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4313 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4314 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4315 } 4316 4317 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4318 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4319 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4320 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4321 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4322 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4323 PetscFunctionReturn(0); 4324 } 4325 4326 #undef __FUNCT__ 4327 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4328 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4329 { 4330 PetscErrorCode ierr; 4331 PetscInt m,N,i,rstart,nnz,Ii; 4332 PetscInt *indx; 4333 PetscScalar *values; 4334 4335 PetscFunctionBegin; 4336 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4337 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4338 for (i=0;i<m;i++) { 4339 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4340 Ii = i + rstart; 4341 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4342 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4343 } 4344 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4345 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4346 PetscFunctionReturn(0); 4347 } 4348 4349 #undef __FUNCT__ 4350 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4351 /*@ 4352 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4353 matrices from each processor 4354 4355 Collective on MPI_Comm 4356 4357 Input Parameters: 4358 + comm - the communicators the parallel matrix will live on 4359 . inmat - the input sequential matrices 4360 . n - number of local columns (or PETSC_DECIDE) 4361 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4362 4363 Output Parameter: 4364 . outmat - the parallel matrix generated 4365 4366 Level: advanced 4367 4368 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4369 4370 @*/ 4371 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4372 { 4373 PetscErrorCode ierr; 4374 4375 PetscFunctionBegin; 4376 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4377 if (scall == MAT_INITIAL_MATRIX){ 4378 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4379 } 4380 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4381 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4382 PetscFunctionReturn(0); 4383 } 4384 4385 #undef __FUNCT__ 4386 #define __FUNCT__ "MatFileSplit" 4387 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4388 { 4389 PetscErrorCode ierr; 4390 PetscMPIInt rank; 4391 PetscInt m,N,i,rstart,nnz; 4392 size_t len; 4393 const PetscInt *indx; 4394 PetscViewer out; 4395 char *name; 4396 Mat B; 4397 const PetscScalar *values; 4398 4399 PetscFunctionBegin; 4400 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4401 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4402 /* Should this be the type of the diagonal block of A? */ 4403 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4404 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4405 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4406 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4407 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4408 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4409 for (i=0;i<m;i++) { 4410 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4411 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4412 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4413 } 4414 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4415 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4416 4417 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4418 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4419 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4420 sprintf(name,"%s.%d",outfile,rank); 4421 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4422 ierr = PetscFree(name); 4423 ierr = MatView(B,out);CHKERRQ(ierr); 4424 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4425 ierr = MatDestroy(&B);CHKERRQ(ierr); 4426 PetscFunctionReturn(0); 4427 } 4428 4429 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4430 #undef __FUNCT__ 4431 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4432 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4433 { 4434 PetscErrorCode ierr; 4435 Mat_Merge_SeqsToMPI *merge; 4436 PetscContainer container; 4437 4438 PetscFunctionBegin; 4439 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4440 if (container) { 4441 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4442 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4443 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4444 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4445 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4446 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4447 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4448 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4449 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4450 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4451 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4452 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4453 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4454 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4455 ierr = PetscFree(merge);CHKERRQ(ierr); 4456 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4457 } 4458 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4459 PetscFunctionReturn(0); 4460 } 4461 4462 #include <../src/mat/utils/freespace.h> 4463 #include <petscbt.h> 4464 4465 #undef __FUNCT__ 4466 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4467 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4468 { 4469 PetscErrorCode ierr; 4470 MPI_Comm comm=((PetscObject)mpimat)->comm; 4471 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4472 PetscMPIInt size,rank,taga,*len_s; 4473 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4474 PetscInt proc,m; 4475 PetscInt **buf_ri,**buf_rj; 4476 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4477 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4478 MPI_Request *s_waits,*r_waits; 4479 MPI_Status *status; 4480 MatScalar *aa=a->a; 4481 MatScalar **abuf_r,*ba_i; 4482 Mat_Merge_SeqsToMPI *merge; 4483 PetscContainer container; 4484 4485 PetscFunctionBegin; 4486 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4487 4488 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4489 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4490 4491 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4492 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4493 4494 bi = merge->bi; 4495 bj = merge->bj; 4496 buf_ri = merge->buf_ri; 4497 buf_rj = merge->buf_rj; 4498 4499 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4500 owners = merge->rowmap->range; 4501 len_s = merge->len_s; 4502 4503 /* send and recv matrix values */ 4504 /*-----------------------------*/ 4505 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4506 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4507 4508 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4509 for (proc=0,k=0; proc<size; proc++){ 4510 if (!len_s[proc]) continue; 4511 i = owners[proc]; 4512 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4513 k++; 4514 } 4515 4516 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4517 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4518 ierr = PetscFree(status);CHKERRQ(ierr); 4519 4520 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4521 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4522 4523 /* insert mat values of mpimat */ 4524 /*----------------------------*/ 4525 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4526 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4527 4528 for (k=0; k<merge->nrecv; k++){ 4529 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4530 nrows = *(buf_ri_k[k]); 4531 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4532 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4533 } 4534 4535 /* set values of ba */ 4536 m = merge->rowmap->n; 4537 for (i=0; i<m; i++) { 4538 arow = owners[rank] + i; 4539 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4540 bnzi = bi[i+1] - bi[i]; 4541 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4542 4543 /* add local non-zero vals of this proc's seqmat into ba */ 4544 anzi = ai[arow+1] - ai[arow]; 4545 aj = a->j + ai[arow]; 4546 aa = a->a + ai[arow]; 4547 nextaj = 0; 4548 for (j=0; nextaj<anzi; j++){ 4549 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4550 ba_i[j] += aa[nextaj++]; 4551 } 4552 } 4553 4554 /* add received vals into ba */ 4555 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4556 /* i-th row */ 4557 if (i == *nextrow[k]) { 4558 anzi = *(nextai[k]+1) - *nextai[k]; 4559 aj = buf_rj[k] + *(nextai[k]); 4560 aa = abuf_r[k] + *(nextai[k]); 4561 nextaj = 0; 4562 for (j=0; nextaj<anzi; j++){ 4563 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4564 ba_i[j] += aa[nextaj++]; 4565 } 4566 } 4567 nextrow[k]++; nextai[k]++; 4568 } 4569 } 4570 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4571 } 4572 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4573 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4574 4575 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4576 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4577 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4578 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4579 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4580 PetscFunctionReturn(0); 4581 } 4582 4583 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4584 4585 #undef __FUNCT__ 4586 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4587 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4588 { 4589 PetscErrorCode ierr; 4590 Mat B_mpi; 4591 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4592 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4593 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4594 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4595 PetscInt len,proc,*dnz,*onz,bs,cbs; 4596 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4597 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4598 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4599 MPI_Status *status; 4600 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4601 PetscBT lnkbt; 4602 Mat_Merge_SeqsToMPI *merge; 4603 PetscContainer container; 4604 4605 PetscFunctionBegin; 4606 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4607 4608 /* make sure it is a PETSc comm */ 4609 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4610 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4611 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4612 4613 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4614 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4615 4616 /* determine row ownership */ 4617 /*---------------------------------------------------------*/ 4618 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4619 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4620 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4621 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4622 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4623 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4624 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4625 4626 m = merge->rowmap->n; 4627 M = merge->rowmap->N; 4628 owners = merge->rowmap->range; 4629 4630 /* determine the number of messages to send, their lengths */ 4631 /*---------------------------------------------------------*/ 4632 len_s = merge->len_s; 4633 4634 len = 0; /* length of buf_si[] */ 4635 merge->nsend = 0; 4636 for (proc=0; proc<size; proc++){ 4637 len_si[proc] = 0; 4638 if (proc == rank){ 4639 len_s[proc] = 0; 4640 } else { 4641 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4642 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4643 } 4644 if (len_s[proc]) { 4645 merge->nsend++; 4646 nrows = 0; 4647 for (i=owners[proc]; i<owners[proc+1]; i++){ 4648 if (ai[i+1] > ai[i]) nrows++; 4649 } 4650 len_si[proc] = 2*(nrows+1); 4651 len += len_si[proc]; 4652 } 4653 } 4654 4655 /* determine the number and length of messages to receive for ij-structure */ 4656 /*-------------------------------------------------------------------------*/ 4657 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4658 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4659 4660 /* post the Irecv of j-structure */ 4661 /*-------------------------------*/ 4662 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4663 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4664 4665 /* post the Isend of j-structure */ 4666 /*--------------------------------*/ 4667 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4668 4669 for (proc=0, k=0; proc<size; proc++){ 4670 if (!len_s[proc]) continue; 4671 i = owners[proc]; 4672 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4673 k++; 4674 } 4675 4676 /* receives and sends of j-structure are complete */ 4677 /*------------------------------------------------*/ 4678 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4679 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4680 4681 /* send and recv i-structure */ 4682 /*---------------------------*/ 4683 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4684 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4685 4686 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4687 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4688 for (proc=0,k=0; proc<size; proc++){ 4689 if (!len_s[proc]) continue; 4690 /* form outgoing message for i-structure: 4691 buf_si[0]: nrows to be sent 4692 [1:nrows]: row index (global) 4693 [nrows+1:2*nrows+1]: i-structure index 4694 */ 4695 /*-------------------------------------------*/ 4696 nrows = len_si[proc]/2 - 1; 4697 buf_si_i = buf_si + nrows+1; 4698 buf_si[0] = nrows; 4699 buf_si_i[0] = 0; 4700 nrows = 0; 4701 for (i=owners[proc]; i<owners[proc+1]; i++){ 4702 anzi = ai[i+1] - ai[i]; 4703 if (anzi) { 4704 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4705 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4706 nrows++; 4707 } 4708 } 4709 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4710 k++; 4711 buf_si += len_si[proc]; 4712 } 4713 4714 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4715 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4716 4717 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4718 for (i=0; i<merge->nrecv; i++){ 4719 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4720 } 4721 4722 ierr = PetscFree(len_si);CHKERRQ(ierr); 4723 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4724 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4725 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4726 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4727 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4728 ierr = PetscFree(status);CHKERRQ(ierr); 4729 4730 /* compute a local seq matrix in each processor */ 4731 /*----------------------------------------------*/ 4732 /* allocate bi array and free space for accumulating nonzero column info */ 4733 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4734 bi[0] = 0; 4735 4736 /* create and initialize a linked list */ 4737 nlnk = N+1; 4738 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4739 4740 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4741 len = 0; 4742 len = ai[owners[rank+1]] - ai[owners[rank]]; 4743 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4744 current_space = free_space; 4745 4746 /* determine symbolic info for each local row */ 4747 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4748 4749 for (k=0; k<merge->nrecv; k++){ 4750 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4751 nrows = *buf_ri_k[k]; 4752 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4753 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4754 } 4755 4756 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4757 len = 0; 4758 for (i=0;i<m;i++) { 4759 bnzi = 0; 4760 /* add local non-zero cols of this proc's seqmat into lnk */ 4761 arow = owners[rank] + i; 4762 anzi = ai[arow+1] - ai[arow]; 4763 aj = a->j + ai[arow]; 4764 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4765 bnzi += nlnk; 4766 /* add received col data into lnk */ 4767 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4768 if (i == *nextrow[k]) { /* i-th row */ 4769 anzi = *(nextai[k]+1) - *nextai[k]; 4770 aj = buf_rj[k] + *nextai[k]; 4771 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4772 bnzi += nlnk; 4773 nextrow[k]++; nextai[k]++; 4774 } 4775 } 4776 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4777 4778 /* if free space is not available, make more free space */ 4779 if (current_space->local_remaining<bnzi) { 4780 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4781 nspacedouble++; 4782 } 4783 /* copy data into free space, then initialize lnk */ 4784 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4785 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4786 4787 current_space->array += bnzi; 4788 current_space->local_used += bnzi; 4789 current_space->local_remaining -= bnzi; 4790 4791 bi[i+1] = bi[i] + bnzi; 4792 } 4793 4794 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4795 4796 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4797 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4798 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4799 4800 /* create symbolic parallel matrix B_mpi */ 4801 /*---------------------------------------*/ 4802 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4803 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4804 if (n==PETSC_DECIDE) { 4805 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4806 } else { 4807 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4808 } 4809 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4810 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4811 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4812 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4813 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4814 4815 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4816 B_mpi->assembled = PETSC_FALSE; 4817 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4818 merge->bi = bi; 4819 merge->bj = bj; 4820 merge->buf_ri = buf_ri; 4821 merge->buf_rj = buf_rj; 4822 merge->coi = PETSC_NULL; 4823 merge->coj = PETSC_NULL; 4824 merge->owners_co = PETSC_NULL; 4825 4826 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4827 4828 /* attach the supporting struct to B_mpi for reuse */ 4829 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4830 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4831 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4832 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4833 *mpimat = B_mpi; 4834 4835 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4836 PetscFunctionReturn(0); 4837 } 4838 4839 #undef __FUNCT__ 4840 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4841 /*@C 4842 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4843 matrices from each processor 4844 4845 Collective on MPI_Comm 4846 4847 Input Parameters: 4848 + comm - the communicators the parallel matrix will live on 4849 . seqmat - the input sequential matrices 4850 . m - number of local rows (or PETSC_DECIDE) 4851 . n - number of local columns (or PETSC_DECIDE) 4852 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4853 4854 Output Parameter: 4855 . mpimat - the parallel matrix generated 4856 4857 Level: advanced 4858 4859 Notes: 4860 The dimensions of the sequential matrix in each processor MUST be the same. 4861 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4862 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4863 @*/ 4864 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4865 { 4866 PetscErrorCode ierr; 4867 PetscMPIInt size; 4868 4869 PetscFunctionBegin; 4870 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4871 if (size == 1){ 4872 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4873 if (scall == MAT_INITIAL_MATRIX){ 4874 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4875 } else { 4876 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4877 } 4878 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4879 PetscFunctionReturn(0); 4880 } 4881 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4882 if (scall == MAT_INITIAL_MATRIX){ 4883 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4884 } 4885 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4886 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4887 PetscFunctionReturn(0); 4888 } 4889 4890 #undef __FUNCT__ 4891 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4892 /*@ 4893 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4894 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4895 with MatGetSize() 4896 4897 Not Collective 4898 4899 Input Parameters: 4900 + A - the matrix 4901 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4902 4903 Output Parameter: 4904 . A_loc - the local sequential matrix generated 4905 4906 Level: developer 4907 4908 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4909 4910 @*/ 4911 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4912 { 4913 PetscErrorCode ierr; 4914 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4915 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4916 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4917 MatScalar *aa=a->a,*ba=b->a,*cam; 4918 PetscScalar *ca; 4919 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4920 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4921 PetscBool match; 4922 4923 PetscFunctionBegin; 4924 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4925 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4926 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4927 if (scall == MAT_INITIAL_MATRIX){ 4928 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4929 ci[0] = 0; 4930 for (i=0; i<am; i++){ 4931 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4932 } 4933 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4934 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4935 k = 0; 4936 for (i=0; i<am; i++) { 4937 ncols_o = bi[i+1] - bi[i]; 4938 ncols_d = ai[i+1] - ai[i]; 4939 /* off-diagonal portion of A */ 4940 for (jo=0; jo<ncols_o; jo++) { 4941 col = cmap[*bj]; 4942 if (col >= cstart) break; 4943 cj[k] = col; bj++; 4944 ca[k++] = *ba++; 4945 } 4946 /* diagonal portion of A */ 4947 for (j=0; j<ncols_d; j++) { 4948 cj[k] = cstart + *aj++; 4949 ca[k++] = *aa++; 4950 } 4951 /* off-diagonal portion of A */ 4952 for (j=jo; j<ncols_o; j++) { 4953 cj[k] = cmap[*bj++]; 4954 ca[k++] = *ba++; 4955 } 4956 } 4957 /* put together the new matrix */ 4958 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4959 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4960 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4961 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4962 mat->free_a = PETSC_TRUE; 4963 mat->free_ij = PETSC_TRUE; 4964 mat->nonew = 0; 4965 } else if (scall == MAT_REUSE_MATRIX){ 4966 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4967 ci = mat->i; cj = mat->j; cam = mat->a; 4968 for (i=0; i<am; i++) { 4969 /* off-diagonal portion of A */ 4970 ncols_o = bi[i+1] - bi[i]; 4971 for (jo=0; jo<ncols_o; jo++) { 4972 col = cmap[*bj]; 4973 if (col >= cstart) break; 4974 *cam++ = *ba++; bj++; 4975 } 4976 /* diagonal portion of A */ 4977 ncols_d = ai[i+1] - ai[i]; 4978 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4979 /* off-diagonal portion of A */ 4980 for (j=jo; j<ncols_o; j++) { 4981 *cam++ = *ba++; bj++; 4982 } 4983 } 4984 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4985 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4986 PetscFunctionReturn(0); 4987 } 4988 4989 #undef __FUNCT__ 4990 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4991 /*@C 4992 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4993 4994 Not Collective 4995 4996 Input Parameters: 4997 + A - the matrix 4998 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4999 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5000 5001 Output Parameter: 5002 . A_loc - the local sequential matrix generated 5003 5004 Level: developer 5005 5006 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5007 5008 @*/ 5009 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5010 { 5011 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5012 PetscErrorCode ierr; 5013 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5014 IS isrowa,iscola; 5015 Mat *aloc; 5016 PetscBool match; 5017 5018 PetscFunctionBegin; 5019 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5020 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5021 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5022 if (!row){ 5023 start = A->rmap->rstart; end = A->rmap->rend; 5024 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5025 } else { 5026 isrowa = *row; 5027 } 5028 if (!col){ 5029 start = A->cmap->rstart; 5030 cmap = a->garray; 5031 nzA = a->A->cmap->n; 5032 nzB = a->B->cmap->n; 5033 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5034 ncols = 0; 5035 for (i=0; i<nzB; i++) { 5036 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5037 else break; 5038 } 5039 imark = i; 5040 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5041 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5042 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5043 } else { 5044 iscola = *col; 5045 } 5046 if (scall != MAT_INITIAL_MATRIX){ 5047 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5048 aloc[0] = *A_loc; 5049 } 5050 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5051 *A_loc = aloc[0]; 5052 ierr = PetscFree(aloc);CHKERRQ(ierr); 5053 if (!row){ 5054 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5055 } 5056 if (!col){ 5057 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5058 } 5059 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5060 PetscFunctionReturn(0); 5061 } 5062 5063 #undef __FUNCT__ 5064 #define __FUNCT__ "MatGetBrowsOfAcols" 5065 /*@C 5066 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5067 5068 Collective on Mat 5069 5070 Input Parameters: 5071 + A,B - the matrices in mpiaij format 5072 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5073 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5074 5075 Output Parameter: 5076 + rowb, colb - index sets of rows and columns of B to extract 5077 - B_seq - the sequential matrix generated 5078 5079 Level: developer 5080 5081 @*/ 5082 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5083 { 5084 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5085 PetscErrorCode ierr; 5086 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5087 IS isrowb,iscolb; 5088 Mat *bseq=PETSC_NULL; 5089 5090 PetscFunctionBegin; 5091 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5092 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5093 } 5094 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5095 5096 if (scall == MAT_INITIAL_MATRIX){ 5097 start = A->cmap->rstart; 5098 cmap = a->garray; 5099 nzA = a->A->cmap->n; 5100 nzB = a->B->cmap->n; 5101 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5102 ncols = 0; 5103 for (i=0; i<nzB; i++) { /* row < local row index */ 5104 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5105 else break; 5106 } 5107 imark = i; 5108 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5109 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5110 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5111 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5112 } else { 5113 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5114 isrowb = *rowb; iscolb = *colb; 5115 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5116 bseq[0] = *B_seq; 5117 } 5118 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5119 *B_seq = bseq[0]; 5120 ierr = PetscFree(bseq);CHKERRQ(ierr); 5121 if (!rowb){ 5122 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5123 } else { 5124 *rowb = isrowb; 5125 } 5126 if (!colb){ 5127 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5128 } else { 5129 *colb = iscolb; 5130 } 5131 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5132 PetscFunctionReturn(0); 5133 } 5134 5135 #undef __FUNCT__ 5136 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5137 /* 5138 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5139 of the OFF-DIAGONAL portion of local A 5140 5141 Collective on Mat 5142 5143 Input Parameters: 5144 + A,B - the matrices in mpiaij format 5145 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5146 5147 Output Parameter: 5148 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5149 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5150 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5151 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5152 5153 Level: developer 5154 5155 */ 5156 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5157 { 5158 VecScatter_MPI_General *gen_to,*gen_from; 5159 PetscErrorCode ierr; 5160 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5161 Mat_SeqAIJ *b_oth; 5162 VecScatter ctx=a->Mvctx; 5163 MPI_Comm comm=((PetscObject)ctx)->comm; 5164 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5165 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5166 PetscScalar *rvalues,*svalues; 5167 MatScalar *b_otha,*bufa,*bufA; 5168 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5169 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5170 MPI_Status *sstatus,rstatus; 5171 PetscMPIInt jj; 5172 PetscInt *cols,sbs,rbs; 5173 PetscScalar *vals; 5174 5175 PetscFunctionBegin; 5176 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5177 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5178 } 5179 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5180 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5181 5182 gen_to = (VecScatter_MPI_General*)ctx->todata; 5183 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5184 rvalues = gen_from->values; /* holds the length of receiving row */ 5185 svalues = gen_to->values; /* holds the length of sending row */ 5186 nrecvs = gen_from->n; 5187 nsends = gen_to->n; 5188 5189 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5190 srow = gen_to->indices; /* local row index to be sent */ 5191 sstarts = gen_to->starts; 5192 sprocs = gen_to->procs; 5193 sstatus = gen_to->sstatus; 5194 sbs = gen_to->bs; 5195 rstarts = gen_from->starts; 5196 rprocs = gen_from->procs; 5197 rbs = gen_from->bs; 5198 5199 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5200 if (scall == MAT_INITIAL_MATRIX){ 5201 /* i-array */ 5202 /*---------*/ 5203 /* post receives */ 5204 for (i=0; i<nrecvs; i++){ 5205 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5206 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5207 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5208 } 5209 5210 /* pack the outgoing message */ 5211 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5212 sstartsj[0] = 0; rstartsj[0] = 0; 5213 len = 0; /* total length of j or a array to be sent */ 5214 k = 0; 5215 for (i=0; i<nsends; i++){ 5216 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5217 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5218 for (j=0; j<nrows; j++) { 5219 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5220 for (l=0; l<sbs; l++){ 5221 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5222 rowlen[j*sbs+l] = ncols; 5223 len += ncols; 5224 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5225 } 5226 k++; 5227 } 5228 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5229 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5230 } 5231 /* recvs and sends of i-array are completed */ 5232 i = nrecvs; 5233 while (i--) { 5234 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5235 } 5236 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5237 5238 /* allocate buffers for sending j and a arrays */ 5239 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5240 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5241 5242 /* create i-array of B_oth */ 5243 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5244 b_othi[0] = 0; 5245 len = 0; /* total length of j or a array to be received */ 5246 k = 0; 5247 for (i=0; i<nrecvs; i++){ 5248 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5249 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5250 for (j=0; j<nrows; j++) { 5251 b_othi[k+1] = b_othi[k] + rowlen[j]; 5252 len += rowlen[j]; k++; 5253 } 5254 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5255 } 5256 5257 /* allocate space for j and a arrrays of B_oth */ 5258 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5259 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5260 5261 /* j-array */ 5262 /*---------*/ 5263 /* post receives of j-array */ 5264 for (i=0; i<nrecvs; i++){ 5265 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5266 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5267 } 5268 5269 /* pack the outgoing message j-array */ 5270 k = 0; 5271 for (i=0; i<nsends; i++){ 5272 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5273 bufJ = bufj+sstartsj[i]; 5274 for (j=0; j<nrows; j++) { 5275 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5276 for (ll=0; ll<sbs; ll++){ 5277 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5278 for (l=0; l<ncols; l++){ 5279 *bufJ++ = cols[l]; 5280 } 5281 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5282 } 5283 } 5284 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5285 } 5286 5287 /* recvs and sends of j-array are completed */ 5288 i = nrecvs; 5289 while (i--) { 5290 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5291 } 5292 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5293 } else if (scall == MAT_REUSE_MATRIX){ 5294 sstartsj = *startsj_s; 5295 rstartsj = *startsj_r; 5296 bufa = *bufa_ptr; 5297 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5298 b_otha = b_oth->a; 5299 } else { 5300 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5301 } 5302 5303 /* a-array */ 5304 /*---------*/ 5305 /* post receives of a-array */ 5306 for (i=0; i<nrecvs; i++){ 5307 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5308 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5309 } 5310 5311 /* pack the outgoing message a-array */ 5312 k = 0; 5313 for (i=0; i<nsends; i++){ 5314 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5315 bufA = bufa+sstartsj[i]; 5316 for (j=0; j<nrows; j++) { 5317 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5318 for (ll=0; ll<sbs; ll++){ 5319 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5320 for (l=0; l<ncols; l++){ 5321 *bufA++ = vals[l]; 5322 } 5323 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5324 } 5325 } 5326 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5327 } 5328 /* recvs and sends of a-array are completed */ 5329 i = nrecvs; 5330 while (i--) { 5331 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5332 } 5333 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5334 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5335 5336 if (scall == MAT_INITIAL_MATRIX){ 5337 /* put together the new matrix */ 5338 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5339 5340 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5341 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5342 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5343 b_oth->free_a = PETSC_TRUE; 5344 b_oth->free_ij = PETSC_TRUE; 5345 b_oth->nonew = 0; 5346 5347 ierr = PetscFree(bufj);CHKERRQ(ierr); 5348 if (!startsj_s || !bufa_ptr){ 5349 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5350 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5351 } else { 5352 *startsj_s = sstartsj; 5353 *startsj_r = rstartsj; 5354 *bufa_ptr = bufa; 5355 } 5356 } 5357 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5358 PetscFunctionReturn(0); 5359 } 5360 5361 #undef __FUNCT__ 5362 #define __FUNCT__ "MatGetCommunicationStructs" 5363 /*@C 5364 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5365 5366 Not Collective 5367 5368 Input Parameters: 5369 . A - The matrix in mpiaij format 5370 5371 Output Parameter: 5372 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5373 . colmap - A map from global column index to local index into lvec 5374 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5375 5376 Level: developer 5377 5378 @*/ 5379 #if defined (PETSC_USE_CTABLE) 5380 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5381 #else 5382 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5383 #endif 5384 { 5385 Mat_MPIAIJ *a; 5386 5387 PetscFunctionBegin; 5388 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5389 PetscValidPointer(lvec, 2); 5390 PetscValidPointer(colmap, 3); 5391 PetscValidPointer(multScatter, 4); 5392 a = (Mat_MPIAIJ *) A->data; 5393 if (lvec) *lvec = a->lvec; 5394 if (colmap) *colmap = a->colmap; 5395 if (multScatter) *multScatter = a->Mvctx; 5396 PetscFunctionReturn(0); 5397 } 5398 5399 EXTERN_C_BEGIN 5400 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5401 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5402 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5403 EXTERN_C_END 5404 5405 #undef __FUNCT__ 5406 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5407 /* 5408 Computes (B'*A')' since computing B*A directly is untenable 5409 5410 n p p 5411 ( ) ( ) ( ) 5412 m ( A ) * n ( B ) = m ( C ) 5413 ( ) ( ) ( ) 5414 5415 */ 5416 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5417 { 5418 PetscErrorCode ierr; 5419 Mat At,Bt,Ct; 5420 5421 PetscFunctionBegin; 5422 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5423 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5424 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5425 ierr = MatDestroy(&At);CHKERRQ(ierr); 5426 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5427 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5428 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5429 PetscFunctionReturn(0); 5430 } 5431 5432 #undef __FUNCT__ 5433 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5434 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5435 { 5436 PetscErrorCode ierr; 5437 PetscInt m=A->rmap->n,n=B->cmap->n; 5438 Mat Cmat; 5439 5440 PetscFunctionBegin; 5441 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5442 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5443 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5444 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5445 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5446 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5447 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5448 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5449 *C = Cmat; 5450 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5451 PetscFunctionReturn(0); 5452 } 5453 5454 /* ----------------------------------------------------------------*/ 5455 #undef __FUNCT__ 5456 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5457 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5458 { 5459 PetscErrorCode ierr; 5460 5461 PetscFunctionBegin; 5462 if (scall == MAT_INITIAL_MATRIX){ 5463 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5464 } 5465 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5466 PetscFunctionReturn(0); 5467 } 5468 5469 EXTERN_C_BEGIN 5470 #if defined(PETSC_HAVE_MUMPS) 5471 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5472 #endif 5473 #if defined(PETSC_HAVE_PASTIX) 5474 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5475 #endif 5476 #if defined(PETSC_HAVE_SUPERLU_DIST) 5477 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5478 #endif 5479 #if defined(PETSC_HAVE_SPOOLES) 5480 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5481 #endif 5482 EXTERN_C_END 5483 5484 /*MC 5485 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5486 5487 Options Database Keys: 5488 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5489 5490 Level: beginner 5491 5492 .seealso: MatCreateAIJ() 5493 M*/ 5494 5495 EXTERN_C_BEGIN 5496 #undef __FUNCT__ 5497 #define __FUNCT__ "MatCreate_MPIAIJ" 5498 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5499 { 5500 Mat_MPIAIJ *b; 5501 PetscErrorCode ierr; 5502 PetscMPIInt size; 5503 5504 PetscFunctionBegin; 5505 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5506 5507 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5508 B->data = (void*)b; 5509 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5510 B->assembled = PETSC_FALSE; 5511 5512 B->insertmode = NOT_SET_VALUES; 5513 b->size = size; 5514 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5515 5516 /* build cache for off array entries formed */ 5517 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5518 b->donotstash = PETSC_FALSE; 5519 b->colmap = 0; 5520 b->garray = 0; 5521 b->roworiented = PETSC_TRUE; 5522 5523 /* stuff used for matrix vector multiply */ 5524 b->lvec = PETSC_NULL; 5525 b->Mvctx = PETSC_NULL; 5526 5527 /* stuff for MatGetRow() */ 5528 b->rowindices = 0; 5529 b->rowvalues = 0; 5530 b->getrowactive = PETSC_FALSE; 5531 5532 #if defined(PETSC_HAVE_SPOOLES) 5533 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5534 "MatGetFactor_mpiaij_spooles", 5535 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5536 #endif 5537 #if defined(PETSC_HAVE_MUMPS) 5538 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5539 "MatGetFactor_aij_mumps", 5540 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5541 #endif 5542 #if defined(PETSC_HAVE_PASTIX) 5543 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5544 "MatGetFactor_mpiaij_pastix", 5545 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5546 #endif 5547 #if defined(PETSC_HAVE_SUPERLU_DIST) 5548 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5549 "MatGetFactor_mpiaij_superlu_dist", 5550 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5551 #endif 5552 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5553 "MatStoreValues_MPIAIJ", 5554 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5555 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5556 "MatRetrieveValues_MPIAIJ", 5557 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5558 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5559 "MatGetDiagonalBlock_MPIAIJ", 5560 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5561 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5562 "MatIsTranspose_MPIAIJ", 5563 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5564 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5565 "MatMPIAIJSetPreallocation_MPIAIJ", 5566 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5567 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5568 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5569 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5570 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5571 "MatDiagonalScaleLocal_MPIAIJ", 5572 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5573 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5574 "MatConvert_MPIAIJ_MPIAIJPERM", 5575 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5576 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5577 "MatConvert_MPIAIJ_MPIAIJCRL", 5578 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5579 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5580 "MatConvert_MPIAIJ_MPISBAIJ", 5581 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5582 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5583 "MatMatMult_MPIDense_MPIAIJ", 5584 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5585 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5586 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5587 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5588 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5589 "MatMatMultNumeric_MPIDense_MPIAIJ", 5590 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5591 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5592 PetscFunctionReturn(0); 5593 } 5594 EXTERN_C_END 5595 5596 #undef __FUNCT__ 5597 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5598 /*@ 5599 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5600 and "off-diagonal" part of the matrix in CSR format. 5601 5602 Collective on MPI_Comm 5603 5604 Input Parameters: 5605 + comm - MPI communicator 5606 . m - number of local rows (Cannot be PETSC_DECIDE) 5607 . n - This value should be the same as the local size used in creating the 5608 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5609 calculated if N is given) For square matrices n is almost always m. 5610 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5611 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5612 . i - row indices for "diagonal" portion of matrix 5613 . j - column indices 5614 . a - matrix values 5615 . oi - row indices for "off-diagonal" portion of matrix 5616 . oj - column indices 5617 - oa - matrix values 5618 5619 Output Parameter: 5620 . mat - the matrix 5621 5622 Level: advanced 5623 5624 Notes: 5625 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5626 must free the arrays once the matrix has been destroyed and not before. 5627 5628 The i and j indices are 0 based 5629 5630 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5631 5632 This sets local rows and cannot be used to set off-processor values. 5633 5634 You cannot later use MatSetValues() to change values in this matrix. 5635 5636 .keywords: matrix, aij, compressed row, sparse, parallel 5637 5638 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5639 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5640 @*/ 5641 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5642 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5643 { 5644 PetscErrorCode ierr; 5645 Mat_MPIAIJ *maij; 5646 5647 PetscFunctionBegin; 5648 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5649 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5650 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5651 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5652 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5653 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5654 maij = (Mat_MPIAIJ*) (*mat)->data; 5655 maij->donotstash = PETSC_TRUE; 5656 (*mat)->preallocated = PETSC_TRUE; 5657 5658 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5659 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5660 5661 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5662 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5663 5664 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5665 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5666 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5667 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5668 5669 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5670 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5671 PetscFunctionReturn(0); 5672 } 5673 5674 /* 5675 Special version for direct calls from Fortran 5676 */ 5677 #include <petsc-private/fortranimpl.h> 5678 5679 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5680 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5681 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5682 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5683 #endif 5684 5685 /* Change these macros so can be used in void function */ 5686 #undef CHKERRQ 5687 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5688 #undef SETERRQ2 5689 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5690 #undef SETERRQ3 5691 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5692 #undef SETERRQ 5693 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5694 5695 EXTERN_C_BEGIN 5696 #undef __FUNCT__ 5697 #define __FUNCT__ "matsetvaluesmpiaij_" 5698 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5699 { 5700 Mat mat = *mmat; 5701 PetscInt m = *mm, n = *mn; 5702 InsertMode addv = *maddv; 5703 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5704 PetscScalar value; 5705 PetscErrorCode ierr; 5706 5707 MatCheckPreallocated(mat,1); 5708 if (mat->insertmode == NOT_SET_VALUES) { 5709 mat->insertmode = addv; 5710 } 5711 #if defined(PETSC_USE_DEBUG) 5712 else if (mat->insertmode != addv) { 5713 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5714 } 5715 #endif 5716 { 5717 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5718 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5719 PetscBool roworiented = aij->roworiented; 5720 5721 /* Some Variables required in the macro */ 5722 Mat A = aij->A; 5723 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5724 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5725 MatScalar *aa = a->a; 5726 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5727 Mat B = aij->B; 5728 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5729 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5730 MatScalar *ba = b->a; 5731 5732 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5733 PetscInt nonew = a->nonew; 5734 MatScalar *ap1,*ap2; 5735 5736 PetscFunctionBegin; 5737 for (i=0; i<m; i++) { 5738 if (im[i] < 0) continue; 5739 #if defined(PETSC_USE_DEBUG) 5740 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5741 #endif 5742 if (im[i] >= rstart && im[i] < rend) { 5743 row = im[i] - rstart; 5744 lastcol1 = -1; 5745 rp1 = aj + ai[row]; 5746 ap1 = aa + ai[row]; 5747 rmax1 = aimax[row]; 5748 nrow1 = ailen[row]; 5749 low1 = 0; 5750 high1 = nrow1; 5751 lastcol2 = -1; 5752 rp2 = bj + bi[row]; 5753 ap2 = ba + bi[row]; 5754 rmax2 = bimax[row]; 5755 nrow2 = bilen[row]; 5756 low2 = 0; 5757 high2 = nrow2; 5758 5759 for (j=0; j<n; j++) { 5760 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5761 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5762 if (in[j] >= cstart && in[j] < cend){ 5763 col = in[j] - cstart; 5764 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5765 } else if (in[j] < 0) continue; 5766 #if defined(PETSC_USE_DEBUG) 5767 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5768 #endif 5769 else { 5770 if (mat->was_assembled) { 5771 if (!aij->colmap) { 5772 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5773 } 5774 #if defined (PETSC_USE_CTABLE) 5775 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5776 col--; 5777 #else 5778 col = aij->colmap[in[j]] - 1; 5779 #endif 5780 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5781 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5782 col = in[j]; 5783 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5784 B = aij->B; 5785 b = (Mat_SeqAIJ*)B->data; 5786 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5787 rp2 = bj + bi[row]; 5788 ap2 = ba + bi[row]; 5789 rmax2 = bimax[row]; 5790 nrow2 = bilen[row]; 5791 low2 = 0; 5792 high2 = nrow2; 5793 bm = aij->B->rmap->n; 5794 ba = b->a; 5795 } 5796 } else col = in[j]; 5797 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5798 } 5799 } 5800 } else { 5801 if (!aij->donotstash) { 5802 if (roworiented) { 5803 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5804 } else { 5805 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5806 } 5807 } 5808 } 5809 }} 5810 PetscFunctionReturnVoid(); 5811 } 5812 EXTERN_C_END 5813 5814