1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: baij.c,v 1.165 1999/02/18 17:13:47 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "sys.h" 10 #include "src/mat/impls/baij/seq/baij.h" 11 #include "src/vec/vecimpl.h" 12 #include "src/inline/spops.h" 13 14 #define CHUNKSIZE 10 15 16 /* 17 Checks for missing diagonals 18 */ 19 #undef __FUNC__ 20 #define __FUNC__ "MatMissingDiag_SeqBAIJ" 21 int MatMissingDiag_SeqBAIJ(Mat A) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 24 int *diag = a->diag, *jj = a->j,i; 25 26 PetscFunctionBegin; 27 for ( i=0; i<a->mbs; i++ ) { 28 if (jj[diag[i]] != i) { 29 SETERRQ1(1,1,"Matrix is missing diagonal number %d",i); 30 } 31 } 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNC__ 36 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 37 int MatMarkDiag_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 40 int i,j, *diag, m = a->mbs; 41 42 PetscFunctionBegin; 43 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 44 PLogObjectMemory(A,(m+1)*sizeof(int)); 45 for ( i=0; i<m; i++ ) { 46 diag[i] = a->i[i+1]; 47 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 48 if (a->j[j] == i) { 49 diag[i] = j; 50 break; 51 } 52 } 53 } 54 a->diag = diag; 55 PetscFunctionReturn(0); 56 } 57 58 59 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 60 61 #undef __FUNC__ 62 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 63 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 64 PetscTruth *done) 65 { 66 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 67 int ierr, n = a->mbs,i; 68 69 PetscFunctionBegin; 70 *nn = n; 71 if (!ia) PetscFunctionReturn(0); 72 if (symmetric) { 73 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 74 } else if (oshift == 1) { 75 /* temporarily add 1 to i and j indices */ 76 int nz = a->i[n] + 1; 77 for ( i=0; i<nz; i++ ) a->j[i]++; 78 for ( i=0; i<n+1; i++ ) a->i[i]++; 79 *ia = a->i; *ja = a->j; 80 } else { 81 *ia = a->i; *ja = a->j; 82 } 83 84 PetscFunctionReturn(0); 85 } 86 87 #undef __FUNC__ 88 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 89 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 90 PetscTruth *done) 91 { 92 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 93 int i,n = a->mbs; 94 95 PetscFunctionBegin; 96 if (!ia) PetscFunctionReturn(0); 97 if (symmetric) { 98 PetscFree(*ia); 99 PetscFree(*ja); 100 } else if (oshift == 1) { 101 int nz = a->i[n]; 102 for ( i=0; i<nz; i++ ) a->j[i]--; 103 for ( i=0; i<n+1; i++ ) a->i[i]--; 104 } 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNC__ 109 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 110 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 111 { 112 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 113 114 PetscFunctionBegin; 115 *bs = baij->bs; 116 PetscFunctionReturn(0); 117 } 118 119 120 #undef __FUNC__ 121 #define __FUNC__ "MatDestroy_SeqBAIJ" 122 int MatDestroy_SeqBAIJ(Mat A) 123 { 124 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 125 int ierr; 126 127 if (--A->refct > 0) PetscFunctionReturn(0); 128 129 if (A->mapping) { 130 ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 131 } 132 if (A->bmapping) { 133 ierr = ISLocalToGlobalMappingDestroy(A->bmapping); CHKERRQ(ierr); 134 } 135 if (A->rmap) { 136 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 137 } 138 if (A->cmap) { 139 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 140 } 141 #if defined(USE_PETSC_LOG) 142 PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 143 #endif 144 PetscFree(a->a); 145 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 146 if (a->diag) PetscFree(a->diag); 147 if (a->ilen) PetscFree(a->ilen); 148 if (a->imax) PetscFree(a->imax); 149 if (a->solve_work) PetscFree(a->solve_work); 150 if (a->mult_work) PetscFree(a->mult_work); 151 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 152 PetscFree(a); 153 PLogObjectDestroy(A); 154 PetscHeaderDestroy(A); 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNC__ 159 #define __FUNC__ "MatSetOption_SeqBAIJ" 160 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 161 { 162 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 163 164 PetscFunctionBegin; 165 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 166 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 167 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 168 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 169 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 170 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 171 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 172 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 173 else if (op == MAT_ROWS_SORTED || 174 op == MAT_ROWS_UNSORTED || 175 op == MAT_SYMMETRIC || 176 op == MAT_STRUCTURALLY_SYMMETRIC || 177 op == MAT_YES_NEW_DIAGONALS || 178 op == MAT_IGNORE_OFF_PROC_ENTRIES || 179 op == MAT_USE_HASH_TABLE) { 180 PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 181 } else if (op == MAT_NO_NEW_DIAGONALS) { 182 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 183 } else { 184 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 185 } 186 PetscFunctionReturn(0); 187 } 188 189 190 #undef __FUNC__ 191 #define __FUNC__ "MatGetSize_SeqBAIJ" 192 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 193 { 194 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 195 196 PetscFunctionBegin; 197 if (m) *m = a->m; 198 if (n) *n = a->n; 199 PetscFunctionReturn(0); 200 } 201 202 #undef __FUNC__ 203 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 204 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 205 { 206 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 207 208 PetscFunctionBegin; 209 *m = 0; *n = a->m; 210 PetscFunctionReturn(0); 211 } 212 213 #undef __FUNC__ 214 #define __FUNC__ "MatGetRow_SeqBAIJ" 215 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 218 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 219 MatScalar *aa,*aa_i; 220 Scalar *v_i; 221 222 PetscFunctionBegin; 223 bs = a->bs; 224 ai = a->i; 225 aj = a->j; 226 aa = a->a; 227 bs2 = a->bs2; 228 229 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 230 231 bn = row/bs; /* Block number */ 232 bp = row % bs; /* Block Position */ 233 M = ai[bn+1] - ai[bn]; 234 *nz = bs*M; 235 236 if (v) { 237 *v = 0; 238 if (*nz) { 239 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 240 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 241 v_i = *v + i*bs; 242 aa_i = aa + bs2*(ai[bn] + i); 243 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 244 } 245 } 246 } 247 248 if (idx) { 249 *idx = 0; 250 if (*nz) { 251 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 252 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 253 idx_i = *idx + i*bs; 254 itmp = bs*aj[ai[bn] + i]; 255 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 256 } 257 } 258 } 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNC__ 263 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 264 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 265 { 266 PetscFunctionBegin; 267 if (idx) {if (*idx) PetscFree(*idx);} 268 if (v) {if (*v) PetscFree(*v);} 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNC__ 273 #define __FUNC__ "MatTranspose_SeqBAIJ" 274 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 275 { 276 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 277 Mat C; 278 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 279 int *rows,*cols,bs2=a->bs2; 280 MatScalar *array = a->a; 281 282 PetscFunctionBegin; 283 if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 284 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 285 PetscMemzero(col,(1+nbs)*sizeof(int)); 286 287 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 288 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 289 PetscFree(col); 290 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 291 cols = rows + bs; 292 for ( i=0; i<mbs; i++ ) { 293 cols[0] = i*bs; 294 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 295 len = ai[i+1] - ai[i]; 296 for ( j=0; j<len; j++ ) { 297 rows[0] = (*aj++)*bs; 298 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 299 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 300 array += bs2; 301 } 302 } 303 PetscFree(rows); 304 305 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 306 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 307 308 if (B != PETSC_NULL) { 309 *B = C; 310 } else { 311 PetscOps *Abops; 312 MatOps Aops; 313 314 /* This isn't really an in-place transpose */ 315 PetscFree(a->a); 316 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 317 if (a->diag) PetscFree(a->diag); 318 if (a->ilen) PetscFree(a->ilen); 319 if (a->imax) PetscFree(a->imax); 320 if (a->solve_work) PetscFree(a->solve_work); 321 PetscFree(a); 322 323 324 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 325 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 326 327 /* 328 This is horrible, horrible code. We need to keep the 329 A pointers for the bops and ops but copy everything 330 else from C. 331 */ 332 Abops = A->bops; 333 Aops = A->ops; 334 PetscMemcpy(A,C,sizeof(struct _p_Mat)); 335 A->bops = Abops; 336 A->ops = Aops; 337 A->qlist = 0; 338 339 PetscHeaderDestroy(C); 340 } 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNC__ 345 #define __FUNC__ "MatView_SeqBAIJ_Binary" 346 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 347 { 348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 349 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 350 Scalar *aa; 351 FILE *file; 352 353 PetscFunctionBegin; 354 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 355 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 356 col_lens[0] = MAT_COOKIE; 357 358 col_lens[1] = a->m; 359 col_lens[2] = a->n; 360 col_lens[3] = a->nz*bs2; 361 362 /* store lengths of each row and write (including header) to file */ 363 count = 0; 364 for ( i=0; i<a->mbs; i++ ) { 365 for ( j=0; j<bs; j++ ) { 366 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 367 } 368 } 369 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr); 370 PetscFree(col_lens); 371 372 /* store column indices (zero start index) */ 373 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 374 count = 0; 375 for ( i=0; i<a->mbs; i++ ) { 376 for ( j=0; j<bs; j++ ) { 377 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 378 for ( l=0; l<bs; l++ ) { 379 jj[count++] = bs*a->j[k] + l; 380 } 381 } 382 } 383 } 384 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr); 385 PetscFree(jj); 386 387 /* store nonzero values */ 388 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 389 count = 0; 390 for ( i=0; i<a->mbs; i++ ) { 391 for ( j=0; j<bs; j++ ) { 392 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 393 for ( l=0; l<bs; l++ ) { 394 aa[count++] = a->a[bs2*k + l*bs + j]; 395 } 396 } 397 } 398 } 399 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr); 400 PetscFree(aa); 401 402 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 403 if (file) { 404 fprintf(file,"-matload_block_size %d\n",a->bs); 405 } 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNC__ 410 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 411 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 412 { 413 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 414 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 415 FILE *fd; 416 char *outputname; 417 418 PetscFunctionBegin; 419 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 420 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 421 ierr = ViewerGetFormat(viewer,&format); 422 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 423 ViewerASCIIPrintf(viewer," block size is %d\n",bs); 424 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 425 SETERRQ(PETSC_ERR_SUP,0,"Socket format not supported"); 426 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 427 for ( i=0; i<a->mbs; i++ ) { 428 for ( j=0; j<bs; j++ ) { 429 fprintf(fd,"row %d:",i*bs+j); 430 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 431 for ( l=0; l<bs; l++ ) { 432 #if defined(USE_PETSC_COMPLEX) 433 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 434 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 435 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j])); 436 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 437 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 438 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j])); 439 } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 440 fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j])); 441 } 442 #else 443 if (a->a[bs2*k + l*bs + j] != 0.0) { 444 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 445 } 446 #endif 447 } 448 } 449 fprintf(fd,"\n"); 450 } 451 } 452 } else { 453 for ( i=0; i<a->mbs; i++ ) { 454 for ( j=0; j<bs; j++ ) { 455 fprintf(fd,"row %d:",i*bs+j); 456 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 457 for ( l=0; l<bs; l++ ) { 458 #if defined(USE_PETSC_COMPLEX) 459 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) { 460 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 461 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j])); 462 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) { 463 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 464 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j])); 465 } else { 466 fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j])); 467 } 468 #else 469 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 470 #endif 471 } 472 } 473 fprintf(fd,"\n"); 474 } 475 } 476 } 477 fflush(fd); 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNC__ 482 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom" 483 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa) 484 { 485 Mat A = (Mat) Aa; 486 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 487 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 488 double xl,yl,xr,yr,x_l,x_r,y_l,y_r; 489 MatScalar *aa; 490 MPI_Comm comm; 491 Viewer viewer; 492 493 PetscFunctionBegin; 494 /* 495 This is nasty. If this is called from an originally parallel matrix 496 then all processes call this, but only the first has the matrix so the 497 rest should return immediately. 498 */ 499 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 500 MPI_Comm_rank(comm,&rank); 501 if (rank) PetscFunctionReturn(0); 502 503 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr); 504 505 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr); CHKERRQ(ierr); 506 507 /* loop over matrix elements drawing boxes */ 508 color = DRAW_BLUE; 509 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 510 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 511 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 512 x_l = a->j[j]*bs; x_r = x_l + 1.0; 513 aa = a->a + j*bs2; 514 for ( k=0; k<bs; k++ ) { 515 for ( l=0; l<bs; l++ ) { 516 if (PetscReal(*aa++) >= 0.) continue; 517 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 518 } 519 } 520 } 521 } 522 color = DRAW_CYAN; 523 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 524 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 525 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 526 x_l = a->j[j]*bs; x_r = x_l + 1.0; 527 aa = a->a + j*bs2; 528 for ( k=0; k<bs; k++ ) { 529 for ( l=0; l<bs; l++ ) { 530 if (PetscReal(*aa++) != 0.) continue; 531 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 532 } 533 } 534 } 535 } 536 537 color = DRAW_RED; 538 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 539 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 540 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 541 x_l = a->j[j]*bs; x_r = x_l + 1.0; 542 aa = a->a + j*bs2; 543 for ( k=0; k<bs; k++ ) { 544 for ( l=0; l<bs; l++ ) { 545 if (PetscReal(*aa++) <= 0.) continue; 546 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 547 } 548 } 549 } 550 } 551 PetscFunctionReturn(0); 552 } 553 554 #undef __FUNC__ 555 #define __FUNC__ "MatView_SeqBAIJ_Draw" 556 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 557 { 558 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 559 int ierr; 560 double xl,yl,xr,yr,w,h; 561 Draw draw; 562 PetscTruth isnull; 563 564 PetscFunctionBegin; 565 566 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 567 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 568 569 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 570 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 571 xr += w; yr += h; xl = -w; yl = -h; 572 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 573 ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A); CHKERRQ(ierr); 574 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNC__ 579 #define __FUNC__ "MatView_SeqBAIJ" 580 int MatView_SeqBAIJ(Mat A,Viewer viewer) 581 { 582 ViewerType vtype; 583 int ierr; 584 585 PetscFunctionBegin; 586 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 587 if (PetscTypeCompare(vtype,SOCKET_VIEWER)) { 588 SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported"); 589 } else if (PetscTypeCompare(vtype,ASCII_VIEWER)){ 590 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 591 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 592 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 593 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 594 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 595 } else { 596 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 597 } 598 PetscFunctionReturn(0); 599 } 600 601 602 #undef __FUNC__ 603 #define __FUNC__ "MatGetValues_SeqBAIJ" 604 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 605 { 606 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 607 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 608 int *ai = a->i, *ailen = a->ilen; 609 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 610 MatScalar *ap, *aa = a->a, zero = 0.0; 611 612 PetscFunctionBegin; 613 for ( k=0; k<m; k++ ) { /* loop over rows */ 614 row = im[k]; brow = row/bs; 615 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 616 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 617 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 618 nrow = ailen[brow]; 619 for ( l=0; l<n; l++ ) { /* loop over columns */ 620 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 621 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 622 col = in[l] ; 623 bcol = col/bs; 624 cidx = col%bs; 625 ridx = row%bs; 626 high = nrow; 627 low = 0; /* assume unsorted */ 628 while (high-low > 5) { 629 t = (low+high)/2; 630 if (rp[t] > bcol) high = t; 631 else low = t; 632 } 633 for ( i=low; i<high; i++ ) { 634 if (rp[i] > bcol) break; 635 if (rp[i] == bcol) { 636 *v++ = ap[bs2*i+bs*cidx+ridx]; 637 goto finished; 638 } 639 } 640 *v++ = zero; 641 finished:; 642 } 643 } 644 PetscFunctionReturn(0); 645 } 646 647 648 #undef __FUNC__ 649 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 650 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 651 { 652 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 653 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 654 int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 655 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 656 Scalar *value = v; 657 MatScalar *ap,*aa=a->a,*bap; 658 659 PetscFunctionBegin; 660 if (roworiented) { 661 stepval = (n-1)*bs; 662 } else { 663 stepval = (m-1)*bs; 664 } 665 for ( k=0; k<m; k++ ) { /* loop over added rows */ 666 row = im[k]; 667 if (row < 0) continue; 668 #if defined(USE_PETSC_BOPT_g) 669 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 670 #endif 671 rp = aj + ai[row]; 672 ap = aa + bs2*ai[row]; 673 rmax = imax[row]; 674 nrow = ailen[row]; 675 low = 0; 676 for ( l=0; l<n; l++ ) { /* loop over added columns */ 677 if (in[l] < 0) continue; 678 #if defined(USE_PETSC_BOPT_g) 679 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 680 #endif 681 col = in[l]; 682 if (roworiented) { 683 value = v + k*(stepval+bs)*bs + l*bs; 684 } else { 685 value = v + l*(stepval+bs)*bs + k*bs; 686 } 687 if (!sorted) low = 0; high = nrow; 688 while (high-low > 7) { 689 t = (low+high)/2; 690 if (rp[t] > col) high = t; 691 else low = t; 692 } 693 for ( i=low; i<high; i++ ) { 694 if (rp[i] > col) break; 695 if (rp[i] == col) { 696 bap = ap + bs2*i; 697 if (roworiented) { 698 if (is == ADD_VALUES) { 699 for ( ii=0; ii<bs; ii++,value+=stepval ) { 700 for (jj=ii; jj<bs2; jj+=bs ) { 701 bap[jj] += *value++; 702 } 703 } 704 } else { 705 for ( ii=0; ii<bs; ii++,value+=stepval ) { 706 for (jj=ii; jj<bs2; jj+=bs ) { 707 bap[jj] = *value++; 708 } 709 } 710 } 711 } else { 712 if (is == ADD_VALUES) { 713 for ( ii=0; ii<bs; ii++,value+=stepval ) { 714 for (jj=0; jj<bs; jj++ ) { 715 *bap++ += *value++; 716 } 717 } 718 } else { 719 for ( ii=0; ii<bs; ii++,value+=stepval ) { 720 for (jj=0; jj<bs; jj++ ) { 721 *bap++ = *value++; 722 } 723 } 724 } 725 } 726 goto noinsert2; 727 } 728 } 729 if (nonew == 1) goto noinsert2; 730 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 731 if (nrow >= rmax) { 732 /* there is no extra room in row, therefore enlarge */ 733 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 734 MatScalar *new_a; 735 736 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 737 738 /* malloc new storage space */ 739 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 740 new_a = (MatScalar *) PetscMalloc( len ); CHKPTRQ(new_a); 741 new_j = (int *) (new_a + bs2*new_nz); 742 new_i = new_j + new_nz; 743 744 /* copy over old data into new slots */ 745 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 746 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 747 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 748 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 749 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int)); 750 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar)); 751 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar)); 752 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar)); 753 /* free up old matrix storage */ 754 PetscFree(a->a); 755 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 756 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 757 a->singlemalloc = 1; 758 759 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 760 rmax = imax[row] = imax[row] + CHUNKSIZE; 761 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 762 a->maxnz += bs2*CHUNKSIZE; 763 a->reallocs++; 764 a->nz++; 765 } 766 N = nrow++ - 1; 767 /* shift up all the later entries in this row */ 768 for ( ii=N; ii>=i; ii-- ) { 769 rp[ii+1] = rp[ii]; 770 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); 771 } 772 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar)); 773 rp[i] = col; 774 bap = ap + bs2*i; 775 if (roworiented) { 776 for ( ii=0; ii<bs; ii++,value+=stepval) { 777 for (jj=ii; jj<bs2; jj+=bs ) { 778 bap[jj] = *value++; 779 } 780 } 781 } else { 782 for ( ii=0; ii<bs; ii++,value+=stepval ) { 783 for (jj=0; jj<bs; jj++ ) { 784 *bap++ = *value++; 785 } 786 } 787 } 788 noinsert2:; 789 low = i; 790 } 791 ailen[row] = nrow; 792 } 793 PetscFunctionReturn(0); 794 } 795 796 797 #undef __FUNC__ 798 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 799 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 800 { 801 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 802 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 803 int m = a->m,*ip, N, *ailen = a->ilen; 804 int mbs = a->mbs, bs2 = a->bs2,rmax = 0; 805 MatScalar *aa = a->a, *ap; 806 807 PetscFunctionBegin; 808 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 809 810 if (m) rmax = ailen[0]; 811 for ( i=1; i<mbs; i++ ) { 812 /* move each row back by the amount of empty slots (fshift) before it*/ 813 fshift += imax[i-1] - ailen[i-1]; 814 rmax = PetscMax(rmax,ailen[i]); 815 if (fshift) { 816 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 817 N = ailen[i]; 818 for ( j=0; j<N; j++ ) { 819 ip[j-fshift] = ip[j]; 820 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar)); 821 } 822 } 823 ai[i] = ai[i-1] + ailen[i-1]; 824 } 825 if (mbs) { 826 fshift += imax[mbs-1] - ailen[mbs-1]; 827 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 828 } 829 /* reset ilen and imax for each row */ 830 for ( i=0; i<mbs; i++ ) { 831 ailen[i] = imax[i] = ai[i+1] - ai[i]; 832 } 833 a->nz = ai[mbs]; 834 835 /* diagonals may have moved, so kill the diagonal pointers */ 836 if (fshift && a->diag) { 837 PetscFree(a->diag); 838 PLogObjectMemory(A,-(m+1)*sizeof(int)); 839 a->diag = 0; 840 } 841 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 842 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 843 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 844 a->reallocs); 845 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 846 a->reallocs = 0; 847 A->info.nz_unneeded = (double)fshift*bs2; 848 849 PetscFunctionReturn(0); 850 } 851 852 853 854 /* 855 This function returns an array of flags which indicate the locations of contiguous 856 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 857 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 858 Assume: sizes should be long enough to hold all the values. 859 */ 860 #undef __FUNC__ 861 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks" 862 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 863 { 864 int i,j,k,row; 865 PetscTruth flg; 866 867 /* PetscFunctionBegin;*/ 868 for ( i=0,j=0; i<n; j++ ) { 869 row = idx[i]; 870 if (row%bs!=0) { /* Not the begining of a block */ 871 sizes[j] = 1; 872 i++; 873 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 874 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 875 i++; 876 } else { /* Begining of the block, so check if the complete block exists */ 877 flg = PETSC_TRUE; 878 for ( k=1; k<bs; k++ ) { 879 if (row+k != idx[i+k]) { /* break in the block */ 880 flg = PETSC_FALSE; 881 break; 882 } 883 } 884 if (flg == PETSC_TRUE) { /* No break in the bs */ 885 sizes[j] = bs; 886 i+= bs; 887 } else { 888 sizes[j] = 1; 889 i++; 890 } 891 } 892 } 893 *bs_max = j; 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNC__ 898 #define __FUNC__ "MatZeroRows_SeqBAIJ" 899 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 900 { 901 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 902 int ierr,i,j,k,count,is_n,*is_idx,*rows; 903 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 904 Scalar zero = 0.0; 905 MatScalar *aa; 906 907 PetscFunctionBegin; 908 /* Make a copy of the IS and sort it */ 909 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 910 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 911 912 /* allocate memory for rows,sizes */ 913 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int)); CHKPTRQ(rows); 914 sizes = rows + is_n; 915 916 /* initialize copy IS valurs to rows, and sort them */ 917 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 918 ierr = PetscSortInt(is_n,rows); CHKERRQ(ierr); 919 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max); CHKERRQ(ierr); 920 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 921 922 for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) { 923 row = rows[j]; 924 if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row); 925 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 926 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 927 if (sizes[i] == bs) { 928 if (diag) { 929 if (baij->ilen[row/bs] > 0) { 930 baij->ilen[row/bs] = 1; 931 baij->j[baij->i[row/bs]] = row/bs; 932 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar)); CHKERRQ(ierr); 933 } 934 /* Now insert all the diagoanl values for this bs */ 935 for ( k=0; k<bs; k++ ) { 936 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 937 } 938 } else { /* (!diag) */ 939 baij->ilen[row/bs] = 0; 940 } /* end (!diag) */ 941 } else { /* (sizes[i] != bs) */ 942 #if defined (USE_PETSC_DEBUG) 943 if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1"); 944 #endif 945 for ( k=0; k<count; k++ ) { 946 aa[0] = zero; 947 aa+=bs; 948 } 949 if (diag) { 950 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 951 } 952 } 953 } 954 955 PetscFree(rows); 956 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 957 PetscFunctionReturn(0); 958 } 959 960 #undef __FUNC__ 961 #define __FUNC__ "MatSetValues_SeqBAIJ" 962 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 963 { 964 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 965 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 966 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 967 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 968 int ridx,cidx,bs2=a->bs2; 969 MatScalar *ap,value,*aa=a->a,*bap; 970 971 PetscFunctionBegin; 972 for ( k=0; k<m; k++ ) { /* loop over added rows */ 973 row = im[k]; brow = row/bs; 974 if (row < 0) continue; 975 #if defined(USE_PETSC_BOPT_g) 976 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 977 #endif 978 rp = aj + ai[brow]; 979 ap = aa + bs2*ai[brow]; 980 rmax = imax[brow]; 981 nrow = ailen[brow]; 982 low = 0; 983 for ( l=0; l<n; l++ ) { /* loop over added columns */ 984 if (in[l] < 0) continue; 985 #if defined(USE_PETSC_BOPT_g) 986 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 987 #endif 988 col = in[l]; bcol = col/bs; 989 ridx = row % bs; cidx = col % bs; 990 if (roworiented) { 991 value = v[l + k*n]; 992 } else { 993 value = v[k + l*m]; 994 } 995 if (!sorted) low = 0; high = nrow; 996 while (high-low > 7) { 997 t = (low+high)/2; 998 if (rp[t] > bcol) high = t; 999 else low = t; 1000 } 1001 for ( i=low; i<high; i++ ) { 1002 if (rp[i] > bcol) break; 1003 if (rp[i] == bcol) { 1004 bap = ap + bs2*i + bs*cidx + ridx; 1005 if (is == ADD_VALUES) *bap += value; 1006 else *bap = value; 1007 goto noinsert1; 1008 } 1009 } 1010 if (nonew == 1) goto noinsert1; 1011 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1012 if (nrow >= rmax) { 1013 /* there is no extra room in row, therefore enlarge */ 1014 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1015 MatScalar *new_a; 1016 1017 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1018 1019 /* Malloc new storage space */ 1020 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1021 new_a = (MatScalar *) PetscMalloc( len ); CHKPTRQ(new_a); 1022 new_j = (int *) (new_a + bs2*new_nz); 1023 new_i = new_j + new_nz; 1024 1025 /* copy over old data into new slots */ 1026 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 1027 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1028 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 1029 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1030 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 1031 len*sizeof(int)); 1032 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar)); 1033 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar)); 1034 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 1035 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar)); 1036 /* free up old matrix storage */ 1037 PetscFree(a->a); 1038 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 1039 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1040 a->singlemalloc = 1; 1041 1042 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1043 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1044 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1045 a->maxnz += bs2*CHUNKSIZE; 1046 a->reallocs++; 1047 a->nz++; 1048 } 1049 N = nrow++ - 1; 1050 /* shift up all the later entries in this row */ 1051 for ( ii=N; ii>=i; ii-- ) { 1052 rp[ii+1] = rp[ii]; 1053 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); 1054 } 1055 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar)); 1056 rp[i] = bcol; 1057 ap[bs2*i + bs*cidx + ridx] = value; 1058 noinsert1:; 1059 low = i; 1060 } 1061 ailen[brow] = nrow; 1062 } 1063 PetscFunctionReturn(0); 1064 } 1065 1066 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1067 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1068 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1069 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1070 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1071 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec); 1072 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1073 extern int MatScale_SeqBAIJ(Scalar*,Mat); 1074 extern int MatNorm_SeqBAIJ(Mat,NormType,double *); 1075 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*); 1076 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1077 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1078 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1079 extern int MatZeroEntries_SeqBAIJ(Mat); 1080 1081 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1082 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1083 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1084 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1085 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1086 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1087 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1088 1089 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1090 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1091 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1092 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1093 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1094 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1095 1096 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1097 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1098 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1099 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1100 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1101 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1102 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1103 1104 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1105 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1106 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1107 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1108 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1109 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1110 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1111 1112 #undef __FUNC__ 1113 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1114 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1115 { 1116 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1117 Mat outA; 1118 int ierr; 1119 PetscTruth row_identity, col_identity; 1120 1121 PetscFunctionBegin; 1122 if (info && info->fill != 0) SETERRQ(PETSC_ERR_SUP,0,"Only fill=0 supported"); 1123 ierr = ISIdentity(row,&row_identity); CHKERRQ(ierr); 1124 ierr = ISIdentity(col,&col_identity); CHKERRQ(ierr); 1125 if (!row_identity || !col_identity) { 1126 SETERRQ(1,1,"Row and column permutations must be identity"); 1127 } 1128 1129 outA = inA; 1130 inA->factor = FACTOR_LU; 1131 a->row = row; 1132 a->col = col; 1133 1134 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1135 ierr = ISInvertPermutation(col,&(a->icol)); CHKERRQ(ierr); 1136 PLogObjectParent(inA,a->icol); 1137 1138 if (!a->solve_work) { 1139 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1140 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1141 } 1142 1143 if (!a->diag) { 1144 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1145 } 1146 /* 1147 Blocksize 4 and 5 have a special faster factorization/solver for ILU(0) factorization 1148 with natural ordering 1149 */ 1150 if (a->bs == 4) { 1151 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1152 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1153 PLogInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n"); 1154 } else if (a->bs == 5) { 1155 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1156 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1157 PLogInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n"); 1158 } 1159 1160 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1161 1162 1163 PetscFunctionReturn(0); 1164 } 1165 #undef __FUNC__ 1166 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1167 int MatPrintHelp_SeqBAIJ(Mat A) 1168 { 1169 static int called = 0; 1170 MPI_Comm comm = A->comm; 1171 1172 PetscFunctionBegin; 1173 if (called) {PetscFunctionReturn(0);} else called = 1; 1174 (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1175 (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n"); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 EXTERN_C_BEGIN 1180 #undef __FUNC__ 1181 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1182 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1183 { 1184 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1185 int i,nz,n; 1186 1187 PetscFunctionBegin; 1188 nz = baij->maxnz; 1189 n = baij->n; 1190 for (i=0; i<nz; i++) { 1191 baij->j[i] = indices[i]; 1192 } 1193 baij->nz = nz; 1194 for ( i=0; i<n; i++ ) { 1195 baij->ilen[i] = baij->imax[i]; 1196 } 1197 1198 PetscFunctionReturn(0); 1199 } 1200 EXTERN_C_END 1201 1202 #undef __FUNC__ 1203 #define __FUNC__ "MatSeqBAIJSetColumnIndices" 1204 /*@ 1205 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1206 in the matrix. 1207 1208 Input Parameters: 1209 + mat - the SeqBAIJ matrix 1210 - indices - the column indices 1211 1212 Notes: 1213 This can be called if you have precomputed the nonzero structure of the 1214 matrix and want to provide it to the matrix object to improve the performance 1215 of the MatSetValues() operation. 1216 1217 You MUST have set the correct numbers of nonzeros per row in the call to 1218 MatCreateSeqBAIJ(). 1219 1220 MUST be called before any calls to MatSetValues(); 1221 1222 @*/ 1223 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1224 { 1225 int ierr,(*f)(Mat,int *); 1226 1227 PetscFunctionBegin; 1228 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1229 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f); 1230 CHKERRQ(ierr); 1231 if (f) { 1232 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1233 } else { 1234 SETERRQ(1,1,"Wrong type of matrix to set column indices"); 1235 } 1236 PetscFunctionReturn(0); 1237 } 1238 1239 /* -------------------------------------------------------------------*/ 1240 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1241 MatGetRow_SeqBAIJ, 1242 MatRestoreRow_SeqBAIJ, 1243 MatMult_SeqBAIJ_N, 1244 MatMultAdd_SeqBAIJ_N, 1245 MatMultTrans_SeqBAIJ, 1246 MatMultTransAdd_SeqBAIJ, 1247 MatSolve_SeqBAIJ_N, 1248 0, 1249 0, 1250 0, 1251 MatLUFactor_SeqBAIJ, 1252 0, 1253 0, 1254 MatTranspose_SeqBAIJ, 1255 MatGetInfo_SeqBAIJ, 1256 MatEqual_SeqBAIJ, 1257 MatGetDiagonal_SeqBAIJ, 1258 MatDiagonalScale_SeqBAIJ, 1259 MatNorm_SeqBAIJ, 1260 0, 1261 MatAssemblyEnd_SeqBAIJ, 1262 0, 1263 MatSetOption_SeqBAIJ, 1264 MatZeroEntries_SeqBAIJ, 1265 MatZeroRows_SeqBAIJ, 1266 MatLUFactorSymbolic_SeqBAIJ, 1267 MatLUFactorNumeric_SeqBAIJ_N, 1268 0, 1269 0, 1270 MatGetSize_SeqBAIJ, 1271 MatGetSize_SeqBAIJ, 1272 MatGetOwnershipRange_SeqBAIJ, 1273 MatILUFactorSymbolic_SeqBAIJ, 1274 0, 1275 0, 1276 0, 1277 MatDuplicate_SeqBAIJ, 1278 0, 1279 0, 1280 MatILUFactor_SeqBAIJ, 1281 0, 1282 0, 1283 MatGetSubMatrices_SeqBAIJ, 1284 MatIncreaseOverlap_SeqBAIJ, 1285 MatGetValues_SeqBAIJ, 1286 0, 1287 MatPrintHelp_SeqBAIJ, 1288 MatScale_SeqBAIJ, 1289 0, 1290 0, 1291 0, 1292 MatGetBlockSize_SeqBAIJ, 1293 MatGetRowIJ_SeqBAIJ, 1294 MatRestoreRowIJ_SeqBAIJ, 1295 0, 1296 0, 1297 0, 1298 0, 1299 0, 1300 0, 1301 MatSetValuesBlocked_SeqBAIJ, 1302 MatGetSubMatrix_SeqBAIJ, 1303 0, 1304 0, 1305 MatGetMaps_Petsc}; 1306 1307 #undef __FUNC__ 1308 #define __FUNC__ "MatCreateSeqBAIJ" 1309 /*@C 1310 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1311 compressed row) format. For good matrix assembly performance the 1312 user should preallocate the matrix storage by setting the parameter nz 1313 (or the array nzz). By setting these parameters accurately, performance 1314 during matrix assembly can be increased by more than a factor of 50. 1315 1316 Collective on MPI_Comm 1317 1318 Input Parameters: 1319 + comm - MPI communicator, set to PETSC_COMM_SELF 1320 . bs - size of block 1321 . m - number of rows 1322 . n - number of columns 1323 . nz - number of block nonzeros per block row (same for all rows) 1324 - nzz - array containing the number of block nonzeros in the various block rows 1325 (possibly different for each block row) or PETSC_NULL 1326 1327 Output Parameter: 1328 . A - the matrix 1329 1330 Options Database Keys: 1331 . -mat_no_unroll - uses code that does not unroll the loops in the 1332 block calculations (much slower) 1333 . -mat_block_size - size of the blocks to use 1334 1335 Notes: 1336 The block AIJ format is fully compatible with standard Fortran 77 1337 storage. That is, the stored row and column indices can begin at 1338 either one (as in Fortran) or zero. See the users' manual for details. 1339 1340 Specify the preallocated storage with either nz or nnz (not both). 1341 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1342 allocation. For additional details, see the users manual chapter on 1343 matrices. 1344 1345 Level: intermediate 1346 1347 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1348 @*/ 1349 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1350 { 1351 Mat B; 1352 Mat_SeqBAIJ *b; 1353 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 1354 1355 PetscFunctionBegin; 1356 MPI_Comm_size(comm,&size); 1357 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 1358 1359 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1360 1361 if (mbs*bs!=m || nbs*bs!=n) { 1362 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 1363 } 1364 1365 *A = 0; 1366 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView); 1367 PLogObjectCreate(B); 1368 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1369 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1370 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1371 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1372 if (!flg) { 1373 switch (bs) { 1374 case 1: 1375 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1376 B->ops->solve = MatSolve_SeqBAIJ_1; 1377 B->ops->mult = MatMult_SeqBAIJ_1; 1378 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1379 break; 1380 case 2: 1381 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1382 B->ops->solve = MatSolve_SeqBAIJ_2; 1383 B->ops->mult = MatMult_SeqBAIJ_2; 1384 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1385 break; 1386 case 3: 1387 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1388 B->ops->solve = MatSolve_SeqBAIJ_3; 1389 B->ops->mult = MatMult_SeqBAIJ_3; 1390 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1391 break; 1392 case 4: 1393 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1394 B->ops->solve = MatSolve_SeqBAIJ_4; 1395 B->ops->mult = MatMult_SeqBAIJ_4; 1396 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1397 break; 1398 case 5: 1399 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1400 B->ops->solve = MatSolve_SeqBAIJ_5; 1401 B->ops->mult = MatMult_SeqBAIJ_5; 1402 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1403 break; 1404 case 7: 1405 B->ops->mult = MatMult_SeqBAIJ_7; 1406 B->ops->solve = MatSolve_SeqBAIJ_7; 1407 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1408 break; 1409 } 1410 } 1411 B->ops->destroy = MatDestroy_SeqBAIJ; 1412 B->ops->view = MatView_SeqBAIJ; 1413 B->factor = 0; 1414 B->lupivotthreshold = 1.0; 1415 B->mapping = 0; 1416 b->row = 0; 1417 b->col = 0; 1418 b->icol = 0; 1419 b->reallocs = 0; 1420 1421 b->m = m; B->m = m; B->M = m; 1422 b->n = n; B->n = n; B->N = n; 1423 1424 ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr); 1425 ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr); 1426 1427 b->mbs = mbs; 1428 b->nbs = nbs; 1429 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1430 if (nnz == PETSC_NULL) { 1431 if (nz == PETSC_DEFAULT) nz = 5; 1432 else if (nz <= 0) nz = 1; 1433 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1434 nz = nz*mbs; 1435 } else { 1436 nz = 0; 1437 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1438 } 1439 1440 /* allocate the matrix space */ 1441 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int); 1442 b->a = (MatScalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1443 PetscMemzero(b->a,nz*bs2*sizeof(MatScalar)); 1444 b->j = (int *) (b->a + nz*bs2); 1445 PetscMemzero(b->j,nz*sizeof(int)); 1446 b->i = b->j + nz; 1447 b->singlemalloc = 1; 1448 1449 b->i[0] = 0; 1450 for (i=1; i<mbs+1; i++) { 1451 b->i[i] = b->i[i-1] + b->imax[i-1]; 1452 } 1453 1454 /* b->ilen will count nonzeros in each block row so far. */ 1455 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1456 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1457 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1458 1459 b->bs = bs; 1460 b->bs2 = bs2; 1461 b->mbs = mbs; 1462 b->nz = 0; 1463 b->maxnz = nz*bs2; 1464 b->sorted = 0; 1465 b->roworiented = 1; 1466 b->nonew = 0; 1467 b->diag = 0; 1468 b->solve_work = 0; 1469 b->mult_work = 0; 1470 b->spptr = 0; 1471 B->info.nz_unneeded = (double)b->maxnz; 1472 1473 *A = B; 1474 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1475 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1476 1477 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1478 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1479 (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNC__ 1484 #define __FUNC__ "MatDuplicate_SeqBAIJ" 1485 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1486 { 1487 Mat C; 1488 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1489 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1490 1491 PetscFunctionBegin; 1492 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 1493 1494 *B = 0; 1495 PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView); 1496 PLogObjectCreate(C); 1497 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1498 PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps)); 1499 C->ops->destroy = MatDestroy_SeqBAIJ; 1500 C->ops->view = MatView_SeqBAIJ; 1501 C->factor = A->factor; 1502 c->row = 0; 1503 c->col = 0; 1504 c->icol = 0; 1505 C->assembled = PETSC_TRUE; 1506 1507 c->m = C->m = a->m; 1508 c->n = C->n = a->n; 1509 C->M = a->m; 1510 C->N = a->n; 1511 1512 c->bs = a->bs; 1513 c->bs2 = a->bs2; 1514 c->mbs = a->mbs; 1515 c->nbs = a->nbs; 1516 1517 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1518 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1519 for ( i=0; i<mbs; i++ ) { 1520 c->imax[i] = a->imax[i]; 1521 c->ilen[i] = a->ilen[i]; 1522 } 1523 1524 /* allocate the matrix space */ 1525 c->singlemalloc = 1; 1526 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1527 c->a = (MatScalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1528 c->j = (int *) (c->a + nz*bs2); 1529 c->i = c->j + nz; 1530 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1531 if (mbs > 0) { 1532 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1533 if (cpvalues == MAT_COPY_VALUES) { 1534 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar)); 1535 } else { 1536 PetscMemzero(c->a,bs2*nz*sizeof(MatScalar)); 1537 } 1538 } 1539 1540 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1541 c->sorted = a->sorted; 1542 c->roworiented = a->roworiented; 1543 c->nonew = a->nonew; 1544 1545 if (a->diag) { 1546 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1547 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1548 for ( i=0; i<mbs; i++ ) { 1549 c->diag[i] = a->diag[i]; 1550 } 1551 } else c->diag = 0; 1552 c->nz = a->nz; 1553 c->maxnz = a->maxnz; 1554 c->solve_work = 0; 1555 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1556 c->mult_work = 0; 1557 *B = C; 1558 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNC__ 1563 #define __FUNC__ "MatLoad_SeqBAIJ" 1564 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1565 { 1566 Mat_SeqBAIJ *a; 1567 Mat B; 1568 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1569 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1570 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1571 int *masked, nmask,tmp,bs2,ishift; 1572 Scalar *aa; 1573 MPI_Comm comm = ((PetscObject) viewer)->comm; 1574 1575 PetscFunctionBegin; 1576 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1577 bs2 = bs*bs; 1578 1579 MPI_Comm_size(comm,&size); 1580 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 1581 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1582 ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr); 1583 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 1584 M = header[1]; N = header[2]; nz = header[3]; 1585 1586 if (header[3] < 0) { 1587 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 1588 } 1589 1590 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1591 1592 /* 1593 This code adds extra rows to make sure the number of rows is 1594 divisible by the blocksize 1595 */ 1596 mbs = M/bs; 1597 extra_rows = bs - M + bs*(mbs); 1598 if (extra_rows == bs) extra_rows = 0; 1599 else mbs++; 1600 if (extra_rows) { 1601 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1602 } 1603 1604 /* read in row lengths */ 1605 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1606 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1607 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1608 1609 /* read in column indices */ 1610 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1611 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr); 1612 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1613 1614 /* loop over row lengths determining block row lengths */ 1615 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1616 PetscMemzero(browlengths,mbs*sizeof(int)); 1617 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1618 PetscMemzero(mask,mbs*sizeof(int)); 1619 masked = mask + mbs; 1620 rowcount = 0; nzcount = 0; 1621 for ( i=0; i<mbs; i++ ) { 1622 nmask = 0; 1623 for ( j=0; j<bs; j++ ) { 1624 kmax = rowlengths[rowcount]; 1625 for ( k=0; k<kmax; k++ ) { 1626 tmp = jj[nzcount++]/bs; 1627 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1628 } 1629 rowcount++; 1630 } 1631 browlengths[i] += nmask; 1632 /* zero out the mask elements we set */ 1633 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1634 } 1635 1636 /* create our matrix */ 1637 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1638 B = *A; 1639 a = (Mat_SeqBAIJ *) B->data; 1640 1641 /* set matrix "i" values */ 1642 a->i[0] = 0; 1643 for ( i=1; i<= mbs; i++ ) { 1644 a->i[i] = a->i[i-1] + browlengths[i-1]; 1645 a->ilen[i-1] = browlengths[i-1]; 1646 } 1647 a->nz = 0; 1648 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1649 1650 /* read in nonzero values */ 1651 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1652 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr); 1653 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1654 1655 /* set "a" and "j" values into matrix */ 1656 nzcount = 0; jcount = 0; 1657 for ( i=0; i<mbs; i++ ) { 1658 nzcountb = nzcount; 1659 nmask = 0; 1660 for ( j=0; j<bs; j++ ) { 1661 kmax = rowlengths[i*bs+j]; 1662 for ( k=0; k<kmax; k++ ) { 1663 tmp = jj[nzcount++]/bs; 1664 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1665 } 1666 rowcount++; 1667 } 1668 /* sort the masked values */ 1669 PetscSortInt(nmask,masked); 1670 1671 /* set "j" values into matrix */ 1672 maskcount = 1; 1673 for ( j=0; j<nmask; j++ ) { 1674 a->j[jcount++] = masked[j]; 1675 mask[masked[j]] = maskcount++; 1676 } 1677 /* set "a" values into matrix */ 1678 ishift = bs2*a->i[i]; 1679 for ( j=0; j<bs; j++ ) { 1680 kmax = rowlengths[i*bs+j]; 1681 for ( k=0; k<kmax; k++ ) { 1682 tmp = jj[nzcountb]/bs ; 1683 block = mask[tmp] - 1; 1684 point = jj[nzcountb] - bs*tmp; 1685 idx = ishift + bs2*block + j + bs*point; 1686 a->a[idx] = aa[nzcountb++]; 1687 } 1688 } 1689 /* zero out the mask elements we set */ 1690 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1691 } 1692 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 1693 1694 PetscFree(rowlengths); 1695 PetscFree(browlengths); 1696 PetscFree(aa); 1697 PetscFree(jj); 1698 PetscFree(mask); 1699 1700 B->assembled = PETSC_TRUE; 1701 1702 ierr = MatView_Private(B); CHKERRQ(ierr); 1703 PetscFunctionReturn(0); 1704 } 1705 1706 1707 1708