1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 #include "src/mat/impls/sbaij/seq/sbaij.h" 12 13 #define CHUNKSIZE 10 14 15 /* 16 Checks for missing diagonals 17 */ 18 #undef __FUNCT__ 19 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 20 int MatMissingDiagonal_SeqSBAIJ(Mat A) 21 { 22 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 23 int *diag,*jj = a->j,i,ierr; 24 25 PetscFunctionBegin; 26 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 27 diag = a->diag; 28 for (i=0; i<a->mbs; i++) { 29 if (jj[diag[i]] != i) { 30 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 31 } 32 } 33 PetscFunctionReturn(0); 34 } 35 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 38 int MatMarkDiagonal_SeqSBAIJ(Mat A) 39 { 40 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 41 int i,j,*diag,m = a->mbs,ierr; 42 43 PetscFunctionBegin; 44 if (a->diag) PetscFunctionReturn(0); 45 46 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 47 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 48 for (i=0; i<m; i++) { 49 diag[i] = a->i[i+1]; 50 for (j=a->i[i]; j<a->i[i+1]; j++) { 51 if (a->j[j] == i) { 52 diag[i] = j; 53 break; 54 } 55 } 56 } 57 a->diag = diag; 58 PetscFunctionReturn(0); 59 } 60 61 62 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 63 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 66 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 67 { 68 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 69 70 PetscFunctionBegin; 71 if (ia) { 72 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 73 } 74 *nn = a->mbs; 75 PetscFunctionReturn(0); 76 } 77 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 80 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 81 { 82 PetscFunctionBegin; 83 if (!ia) PetscFunctionReturn(0); 84 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 85 /* PetscFunctionReturn(0); */ 86 } 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 90 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 91 { 92 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 93 94 PetscFunctionBegin; 95 *bs = sbaij->bs; 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 101 int MatDestroy_SeqSBAIJ(Mat A) 102 { 103 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 104 int ierr; 105 106 PetscFunctionBegin; 107 #if defined(PETSC_USE_LOG) 108 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 109 #endif 110 ierr = PetscFree(a->a);CHKERRQ(ierr); 111 if (!a->singlemalloc) { 112 ierr = PetscFree(a->i);CHKERRQ(ierr); 113 ierr = PetscFree(a->j);CHKERRQ(ierr); 114 } 115 if (a->row) { 116 ierr = ISDestroy(a->row);CHKERRQ(ierr); 117 } 118 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 119 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 120 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 121 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 122 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 125 126 if (a->inew){ 127 ierr = PetscFree(a->inew);CHKERRQ(ierr); 128 a->inew = 0; 129 } 130 ierr = PetscFree(a);CHKERRQ(ierr); 131 PetscFunctionReturn(0); 132 } 133 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 136 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 137 { 138 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 139 140 PetscFunctionBegin; 141 switch (op) { 142 case MAT_ROW_ORIENTED: 143 a->roworiented = PETSC_TRUE; 144 break; 145 case MAT_COLUMN_ORIENTED: 146 a->roworiented = PETSC_FALSE; 147 break; 148 case MAT_COLUMNS_SORTED: 149 a->sorted = PETSC_TRUE; 150 break; 151 case MAT_COLUMNS_UNSORTED: 152 a->sorted = PETSC_FALSE; 153 break; 154 case MAT_KEEP_ZEROED_ROWS: 155 a->keepzeroedrows = PETSC_TRUE; 156 break; 157 case MAT_NO_NEW_NONZERO_LOCATIONS: 158 a->nonew = 1; 159 break; 160 case MAT_NEW_NONZERO_LOCATION_ERR: 161 a->nonew = -1; 162 break; 163 case MAT_NEW_NONZERO_ALLOCATION_ERR: 164 a->nonew = -2; 165 break; 166 case MAT_YES_NEW_NONZERO_LOCATIONS: 167 a->nonew = 0; 168 break; 169 case MAT_ROWS_SORTED: 170 case MAT_ROWS_UNSORTED: 171 case MAT_YES_NEW_DIAGONALS: 172 case MAT_IGNORE_OFF_PROC_ENTRIES: 173 case MAT_USE_HASH_TABLE: 174 case MAT_USE_SINGLE_PRECISION_SOLVES: 175 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 176 break; 177 case MAT_NO_NEW_DIAGONALS: 178 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 179 break; 180 default: 181 SETERRQ(PETSC_ERR_SUP,"unknown option"); 182 break; 183 } 184 PetscFunctionReturn(0); 185 } 186 187 #undef __FUNCT__ 188 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 189 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 190 { 191 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 192 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 193 MatScalar *aa,*aa_i; 194 PetscScalar *v_i; 195 196 PetscFunctionBegin; 197 bs = a->bs; 198 ai = a->i; 199 aj = a->j; 200 aa = a->a; 201 bs2 = a->bs2; 202 203 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 204 205 bn = row/bs; /* Block number */ 206 bp = row % bs; /* Block position */ 207 M = ai[bn+1] - ai[bn]; 208 *ncols = bs*M; 209 210 if (v) { 211 *v = 0; 212 if (*ncols) { 213 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 214 for (i=0; i<M; i++) { /* for each block in the block row */ 215 v_i = *v + i*bs; 216 aa_i = aa + bs2*(ai[bn] + i); 217 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 218 } 219 } 220 } 221 222 if (cols) { 223 *cols = 0; 224 if (*ncols) { 225 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 226 for (i=0; i<M; i++) { /* for each block in the block row */ 227 cols_i = *cols + i*bs; 228 itmp = bs*aj[ai[bn] + i]; 229 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 230 } 231 } 232 } 233 234 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 235 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 236 #ifdef column_search 237 v_i = *v + M*bs; 238 cols_i = *cols + M*bs; 239 for (i=0; i<bn; i++){ /* for each block row */ 240 M = ai[i+1] - ai[i]; 241 for (j=0; j<M; j++){ 242 itmp = aj[ai[i] + j]; /* block column value */ 243 if (itmp == bn){ 244 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 245 for (k=0; k<bs; k++) { 246 *cols_i++ = i*bs+k; 247 *v_i++ = aa_i[k]; 248 } 249 *ncols += bs; 250 break; 251 } 252 } 253 } 254 #endif 255 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 261 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 262 { 263 int ierr; 264 265 PetscFunctionBegin; 266 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 267 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 273 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 274 { 275 PetscFunctionBegin; 276 SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called"); 277 /* PetscFunctionReturn(0); */ 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 282 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 283 { 284 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 285 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 286 PetscScalar *aa; 287 FILE *file; 288 289 PetscFunctionBegin; 290 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 291 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 292 col_lens[0] = MAT_COOKIE; 293 294 col_lens[1] = A->m; 295 col_lens[2] = A->m; 296 col_lens[3] = a->s_nz*bs2; 297 298 /* store lengths of each row and write (including header) to file */ 299 count = 0; 300 for (i=0; i<a->mbs; i++) { 301 for (j=0; j<bs; j++) { 302 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 303 } 304 } 305 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 306 ierr = PetscFree(col_lens);CHKERRQ(ierr); 307 308 /* store column indices (zero start index) */ 309 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 310 count = 0; 311 for (i=0; i<a->mbs; i++) { 312 for (j=0; j<bs; j++) { 313 for (k=a->i[i]; k<a->i[i+1]; k++) { 314 for (l=0; l<bs; l++) { 315 jj[count++] = bs*a->j[k] + l; 316 } 317 } 318 } 319 } 320 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 321 ierr = PetscFree(jj);CHKERRQ(ierr); 322 323 /* store nonzero values */ 324 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 325 count = 0; 326 for (i=0; i<a->mbs; i++) { 327 for (j=0; j<bs; j++) { 328 for (k=a->i[i]; k<a->i[i+1]; k++) { 329 for (l=0; l<bs; l++) { 330 aa[count++] = a->a[bs2*k + l*bs + j]; 331 } 332 } 333 } 334 } 335 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 336 ierr = PetscFree(aa);CHKERRQ(ierr); 337 338 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 339 if (file) { 340 fprintf(file,"-matload_block_size %d\n",a->bs); 341 } 342 PetscFunctionReturn(0); 343 } 344 345 #undef __FUNCT__ 346 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 347 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 348 { 349 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 350 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 351 char *name; 352 PetscViewerFormat format; 353 354 PetscFunctionBegin; 355 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 356 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 357 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 358 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 359 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 360 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 361 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 362 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 363 for (i=0; i<a->mbs; i++) { 364 for (j=0; j<bs; j++) { 365 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 366 for (k=a->i[i]; k<a->i[i+1]; k++) { 367 for (l=0; l<bs; l++) { 368 #if defined(PETSC_USE_COMPLEX) 369 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 370 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 371 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 372 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 373 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 374 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 375 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 376 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 377 } 378 #else 379 if (a->a[bs2*k + l*bs + j] != 0.0) { 380 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 381 } 382 #endif 383 } 384 } 385 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 386 } 387 } 388 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 389 } else { 390 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 391 for (i=0; i<a->mbs; i++) { 392 for (j=0; j<bs; j++) { 393 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 394 for (k=a->i[i]; k<a->i[i+1]; k++) { 395 for (l=0; l<bs; l++) { 396 #if defined(PETSC_USE_COMPLEX) 397 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 398 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 399 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 400 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 401 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 402 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 403 } else { 404 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 405 } 406 #else 407 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 408 #endif 409 } 410 } 411 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 412 } 413 } 414 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 415 } 416 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 422 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 423 { 424 Mat A = (Mat) Aa; 425 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 426 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 427 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 428 MatScalar *aa; 429 MPI_Comm comm; 430 PetscViewer viewer; 431 432 PetscFunctionBegin; 433 /* 434 This is nasty. If this is called from an originally parallel matrix 435 then all processes call this,but only the first has the matrix so the 436 rest should return immediately. 437 */ 438 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 439 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 440 if (rank) PetscFunctionReturn(0); 441 442 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 443 444 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 445 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 446 447 /* loop over matrix elements drawing boxes */ 448 color = PETSC_DRAW_BLUE; 449 for (i=0,row=0; i<mbs; i++,row+=bs) { 450 for (j=a->i[i]; j<a->i[i+1]; j++) { 451 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 452 x_l = a->j[j]*bs; x_r = x_l + 1.0; 453 aa = a->a + j*bs2; 454 for (k=0; k<bs; k++) { 455 for (l=0; l<bs; l++) { 456 if (PetscRealPart(*aa++) >= 0.) continue; 457 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 458 } 459 } 460 } 461 } 462 color = PETSC_DRAW_CYAN; 463 for (i=0,row=0; i<mbs; i++,row+=bs) { 464 for (j=a->i[i]; j<a->i[i+1]; j++) { 465 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 466 x_l = a->j[j]*bs; x_r = x_l + 1.0; 467 aa = a->a + j*bs2; 468 for (k=0; k<bs; k++) { 469 for (l=0; l<bs; l++) { 470 if (PetscRealPart(*aa++) != 0.) continue; 471 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 472 } 473 } 474 } 475 } 476 477 color = PETSC_DRAW_RED; 478 for (i=0,row=0; i<mbs; i++,row+=bs) { 479 for (j=a->i[i]; j<a->i[i+1]; j++) { 480 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 481 x_l = a->j[j]*bs; x_r = x_l + 1.0; 482 aa = a->a + j*bs2; 483 for (k=0; k<bs; k++) { 484 for (l=0; l<bs; l++) { 485 if (PetscRealPart(*aa++) <= 0.) continue; 486 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 487 } 488 } 489 } 490 } 491 PetscFunctionReturn(0); 492 } 493 494 #undef __FUNCT__ 495 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 496 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 497 { 498 int ierr; 499 PetscReal xl,yl,xr,yr,w,h; 500 PetscDraw draw; 501 PetscTruth isnull; 502 503 PetscFunctionBegin; 504 505 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 506 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 507 508 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 509 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 510 xr += w; yr += h; xl = -w; yl = -h; 511 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 512 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 513 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "MatView_SeqSBAIJ" 519 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 520 { 521 int ierr; 522 PetscTruth issocket,isascii,isbinary,isdraw; 523 524 PetscFunctionBegin; 525 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 526 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 529 if (issocket) { 530 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 531 } else if (isascii){ 532 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 533 } else if (isbinary) { 534 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 535 } else if (isdraw) { 536 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 537 } else { 538 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 539 } 540 PetscFunctionReturn(0); 541 } 542 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 546 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 547 { 548 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 549 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 550 int *ai = a->i,*ailen = a->ilen; 551 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 552 MatScalar *ap,*aa = a->a,zero = 0.0; 553 554 PetscFunctionBegin; 555 for (k=0; k<m; k++) { /* loop over rows */ 556 row = im[k]; brow = row/bs; 557 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 558 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 559 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 560 nrow = ailen[brow]; 561 for (l=0; l<n; l++) { /* loop over columns */ 562 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 563 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 564 col = in[l] ; 565 bcol = col/bs; 566 cidx = col%bs; 567 ridx = row%bs; 568 high = nrow; 569 low = 0; /* assume unsorted */ 570 while (high-low > 5) { 571 t = (low+high)/2; 572 if (rp[t] > bcol) high = t; 573 else low = t; 574 } 575 for (i=low; i<high; i++) { 576 if (rp[i] > bcol) break; 577 if (rp[i] == bcol) { 578 *v++ = ap[bs2*i+bs*cidx+ridx]; 579 goto finished; 580 } 581 } 582 *v++ = zero; 583 finished:; 584 } 585 } 586 PetscFunctionReturn(0); 587 } 588 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 592 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 593 { 594 PetscFunctionBegin; 595 SETERRQ(1,"Function not yet written for SBAIJ format"); 596 /* PetscFunctionReturn(0); */ 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 601 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 602 { 603 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 604 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 605 int m = A->m,*ip,N,*ailen = a->ilen; 606 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 607 MatScalar *aa = a->a,*ap; 608 609 PetscFunctionBegin; 610 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 611 612 if (m) rmax = ailen[0]; 613 for (i=1; i<mbs; i++) { 614 /* move each row back by the amount of empty slots (fshift) before it*/ 615 fshift += imax[i-1] - ailen[i-1]; 616 rmax = PetscMax(rmax,ailen[i]); 617 if (fshift) { 618 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 619 N = ailen[i]; 620 for (j=0; j<N; j++) { 621 ip[j-fshift] = ip[j]; 622 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 623 } 624 } 625 ai[i] = ai[i-1] + ailen[i-1]; 626 } 627 if (mbs) { 628 fshift += imax[mbs-1] - ailen[mbs-1]; 629 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 630 } 631 /* reset ilen and imax for each row */ 632 for (i=0; i<mbs; i++) { 633 ailen[i] = imax[i] = ai[i+1] - ai[i]; 634 } 635 a->s_nz = ai[mbs]; 636 637 /* diagonals may have moved, so kill the diagonal pointers */ 638 if (fshift && a->diag) { 639 ierr = PetscFree(a->diag);CHKERRQ(ierr); 640 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 641 a->diag = 0; 642 } 643 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 644 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 645 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 646 a->reallocs); 647 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 648 a->reallocs = 0; 649 A->info.nz_unneeded = (PetscReal)fshift*bs2; 650 651 PetscFunctionReturn(0); 652 } 653 654 /* 655 This function returns an array of flags which indicate the locations of contiguous 656 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 657 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 658 Assume: sizes should be long enough to hold all the values. 659 */ 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 662 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 663 { 664 int i,j,k,row; 665 PetscTruth flg; 666 667 PetscFunctionBegin; 668 for (i=0,j=0; i<n; j++) { 669 row = idx[i]; 670 if (row%bs!=0) { /* Not the begining of a block */ 671 sizes[j] = 1; 672 i++; 673 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 674 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 675 i++; 676 } else { /* Begining of the block, so check if the complete block exists */ 677 flg = PETSC_TRUE; 678 for (k=1; k<bs; k++) { 679 if (row+k != idx[i+k]) { /* break in the block */ 680 flg = PETSC_FALSE; 681 break; 682 } 683 } 684 if (flg == PETSC_TRUE) { /* No break in the bs */ 685 sizes[j] = bs; 686 i+= bs; 687 } else { 688 sizes[j] = 1; 689 i++; 690 } 691 } 692 } 693 *bs_max = j; 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 699 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 700 { 701 Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data; 702 int ierr,i,j,k,count,is_n,*is_idx,*rows; 703 int bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max; 704 PetscScalar zero = 0.0; 705 MatScalar *aa; 706 707 PetscFunctionBegin; 708 /* Make a copy of the IS and sort it */ 709 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 710 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 711 712 /* allocate memory for rows,sizes */ 713 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 714 sizes = rows + is_n; 715 716 /* initialize copy IS values to rows, and sort them */ 717 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 718 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 719 if (sbaij->keepzeroedrows) { /* do not change nonzero structure */ 720 for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */ 721 bs_max = is_n; /* bs_max: num. of contiguous block row in the row */ 722 } else { 723 ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 724 } 725 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 726 727 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 728 row = rows[j]; /* row to be zeroed */ 729 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 730 count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */ 731 aa = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs); 732 if (sizes[i] == bs && !sbaij->keepzeroedrows) { 733 if (diag) { 734 if (sbaij->ilen[row/bs] > 0) { 735 sbaij->ilen[row/bs] = 1; 736 sbaij->j[sbaij->i[row/bs]] = row/bs; 737 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 738 } 739 /* Now insert all the diagoanl values for this bs */ 740 for (k=0; k<bs; k++) { 741 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 742 } 743 } else { /* (!diag) */ 744 sbaij->ilen[row/bs] = 0; 745 } /* end (!diag) */ 746 } else { /* (sizes[i] != bs), broken block */ 747 #if defined (PETSC_USE_DEBUG) 748 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 749 #endif 750 for (k=0; k<count; k++) { 751 aa[0] = zero; 752 aa+=bs; 753 } 754 if (diag) { 755 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 756 } 757 } 758 } 759 760 ierr = PetscFree(rows);CHKERRQ(ierr); 761 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 762 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 /* Only add/insert a(i,j) with i<=j (blocks). 767 Any a(i,j) with i>j input by user is ingored. 768 */ 769 770 #undef __FUNCT__ 771 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 772 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 773 { 774 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 775 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 776 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 777 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 778 int ridx,cidx,bs2=a->bs2,ierr; 779 MatScalar *ap,value,*aa=a->a,*bap; 780 781 PetscFunctionBegin; 782 783 for (k=0; k<m; k++) { /* loop over added rows */ 784 row = im[k]; /* row number */ 785 brow = row/bs; /* block row number */ 786 if (row < 0) continue; 787 #if defined(PETSC_USE_BOPT_g) 788 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 789 #endif 790 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 791 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 792 rmax = imax[brow]; /* maximum space allocated for this row */ 793 nrow = ailen[brow]; /* actual length of this row */ 794 low = 0; 795 796 for (l=0; l<n; l++) { /* loop over added columns */ 797 if (in[l] < 0) continue; 798 #if defined(PETSC_USE_BOPT_g) 799 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 800 #endif 801 col = in[l]; 802 bcol = col/bs; /* block col number */ 803 804 if (brow <= bcol){ 805 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 806 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 807 /* element value a(k,l) */ 808 if (roworiented) { 809 value = v[l + k*n]; 810 } else { 811 value = v[k + l*m]; 812 } 813 814 /* move pointer bap to a(k,l) quickly and add/insert value */ 815 if (!sorted) low = 0; high = nrow; 816 while (high-low > 7) { 817 t = (low+high)/2; 818 if (rp[t] > bcol) high = t; 819 else low = t; 820 } 821 for (i=low; i<high; i++) { 822 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 823 if (rp[i] > bcol) break; 824 if (rp[i] == bcol) { 825 bap = ap + bs2*i + bs*cidx + ridx; 826 if (is == ADD_VALUES) *bap += value; 827 else *bap = value; 828 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 829 if (brow == bcol && ridx < cidx){ 830 bap = ap + bs2*i + bs*ridx + cidx; 831 if (is == ADD_VALUES) *bap += value; 832 else *bap = value; 833 } 834 goto noinsert1; 835 } 836 } 837 838 if (nonew == 1) goto noinsert1; 839 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 840 if (nrow >= rmax) { 841 /* there is no extra room in row, therefore enlarge */ 842 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 843 MatScalar *new_a; 844 845 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 846 847 /* Malloc new storage space */ 848 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 849 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 850 new_j = (int*)(new_a + bs2*new_nz); 851 new_i = new_j + new_nz; 852 853 /* copy over old data into new slots */ 854 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 855 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 856 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 857 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 858 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 859 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 860 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 861 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 862 /* free up old matrix storage */ 863 ierr = PetscFree(a->a);CHKERRQ(ierr); 864 if (!a->singlemalloc) { 865 ierr = PetscFree(a->i);CHKERRQ(ierr); 866 ierr = PetscFree(a->j);CHKERRQ(ierr); 867 } 868 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 869 a->singlemalloc = PETSC_TRUE; 870 871 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 872 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 873 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 874 a->s_maxnz += bs2*CHUNKSIZE; 875 a->reallocs++; 876 a->s_nz++; 877 } 878 879 N = nrow++ - 1; 880 /* shift up all the later entries in this row */ 881 for (ii=N; ii>=i; ii--) { 882 rp[ii+1] = rp[ii]; 883 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 884 } 885 if (N>=i) { 886 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 887 } 888 rp[i] = bcol; 889 ap[bs2*i + bs*cidx + ridx] = value; 890 noinsert1:; 891 low = i; 892 /* } */ 893 } 894 } /* end of if .. if.. */ 895 } /* end of loop over added columns */ 896 ailen[brow] = nrow; 897 } /* end of loop over added rows */ 898 899 PetscFunctionReturn(0); 900 } 901 902 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*); 903 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal); 904 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 905 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 906 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 907 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 908 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 909 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 910 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 911 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 912 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 913 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 914 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 915 extern int MatZeroEntries_SeqSBAIJ(Mat); 916 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 917 918 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 919 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 920 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 921 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 922 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 923 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 924 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 925 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 926 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 927 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 928 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 929 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 930 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 931 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 932 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 933 934 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 935 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 936 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 937 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 938 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 939 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 940 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 941 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 942 943 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 944 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 945 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 946 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 947 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 948 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 949 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 950 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 951 952 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 953 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 954 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 955 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 956 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 957 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 958 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 959 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 960 961 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 962 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 963 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 964 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 965 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 966 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 967 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 968 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 969 970 #ifdef HAVE_MatICCFactor 971 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 974 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level) 975 { 976 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 977 Mat outA; 978 int ierr; 979 PetscTruth row_identity,col_identity; 980 981 PetscFunctionBegin; 982 /* 983 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 984 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 985 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 986 if (!row_identity || !col_identity) { 987 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 988 } 989 */ 990 991 outA = inA; 992 inA->factor = FACTOR_CHOLESKY; 993 994 if (!a->diag) { 995 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 996 } 997 /* 998 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 999 for ILU(0) factorization with natural ordering 1000 */ 1001 switch (a->bs) { 1002 case 1: 1003 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1004 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1005 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1006 case 2: 1007 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1008 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1009 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1010 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1011 break; 1012 case 3: 1013 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1014 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1015 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1016 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1017 break; 1018 case 4: 1019 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1020 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1021 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1022 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1023 break; 1024 case 5: 1025 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1026 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1027 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1028 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1029 break; 1030 case 6: 1031 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1032 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1033 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1034 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1035 break; 1036 case 7: 1037 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1038 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1039 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1040 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1041 break; 1042 default: 1043 a->row = row; 1044 a->icol = col; 1045 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1046 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1047 1048 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1049 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1050 PetscLogObjectParent(inA,a->icol); 1051 1052 if (!a->solve_work) { 1053 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1054 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1055 } 1056 } 1057 1058 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1059 1060 PetscFunctionReturn(0); 1061 } 1062 #endif 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1066 int MatPrintHelp_SeqSBAIJ(Mat A) 1067 { 1068 static PetscTruth called = PETSC_FALSE; 1069 MPI_Comm comm = A->comm; 1070 int ierr; 1071 1072 PetscFunctionBegin; 1073 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1074 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1075 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 EXTERN_C_BEGIN 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1082 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1083 { 1084 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1085 int i,nz,n; 1086 1087 PetscFunctionBegin; 1088 nz = baij->s_maxnz; 1089 n = mat->n; 1090 for (i=0; i<nz; i++) { 1091 baij->j[i] = indices[i]; 1092 } 1093 baij->s_nz = nz; 1094 for (i=0; i<n; i++) { 1095 baij->ilen[i] = baij->imax[i]; 1096 } 1097 1098 PetscFunctionReturn(0); 1099 } 1100 EXTERN_C_END 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1104 /*@ 1105 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1106 in the matrix. 1107 1108 Input Parameters: 1109 + mat - the SeqSBAIJ matrix 1110 - indices - the column indices 1111 1112 Level: advanced 1113 1114 Notes: 1115 This can be called if you have precomputed the nonzero structure of the 1116 matrix and want to provide it to the matrix object to improve the performance 1117 of the MatSetValues() operation. 1118 1119 You MUST have set the correct numbers of nonzeros per row in the call to 1120 MatCreateSeqSBAIJ(). 1121 1122 MUST be called before any calls to MatSetValues() 1123 1124 .seealso: MatCreateSeqSBAIJ 1125 @*/ 1126 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1127 { 1128 int ierr,(*f)(Mat,int *); 1129 1130 PetscFunctionBegin; 1131 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1132 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr); 1133 if (f) { 1134 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1135 } else { 1136 SETERRQ(1,"Wrong type of matrix to set column indices"); 1137 } 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1143 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1144 { 1145 int ierr; 1146 1147 PetscFunctionBegin; 1148 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 /* -------------------------------------------------------------------*/ 1153 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1154 MatGetRow_SeqSBAIJ, 1155 MatRestoreRow_SeqSBAIJ, 1156 MatMult_SeqSBAIJ_N, 1157 MatMultAdd_SeqSBAIJ_N, 1158 MatMultTranspose_SeqSBAIJ, 1159 MatMultTransposeAdd_SeqSBAIJ, 1160 MatSolve_SeqSBAIJ_N, 1161 0, 1162 0, 1163 0, 1164 0, 1165 MatCholeskyFactor_SeqSBAIJ, 1166 0, 1167 MatTranspose_SeqSBAIJ, 1168 MatGetInfo_SeqSBAIJ, 1169 MatEqual_SeqSBAIJ, 1170 MatGetDiagonal_SeqSBAIJ, 1171 MatDiagonalScale_SeqSBAIJ, 1172 MatNorm_SeqSBAIJ, 1173 0, 1174 MatAssemblyEnd_SeqSBAIJ, 1175 0, 1176 MatSetOption_SeqSBAIJ, 1177 MatZeroEntries_SeqSBAIJ, 1178 MatZeroRows_SeqSBAIJ, 1179 0, 1180 0, 1181 MatCholeskyFactorSymbolic_SeqSBAIJ, 1182 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1183 MatSetUpPreallocation_SeqSBAIJ, 1184 0, 1185 0, 1186 MatICCFactorSymbolic_SeqSBAIJ, 1187 0, 1188 0, 1189 MatDuplicate_SeqSBAIJ, 1190 0, 1191 0, 1192 0, 1193 0, 1194 0, 1195 MatGetSubMatrices_SeqSBAIJ, 1196 MatIncreaseOverlap_SeqSBAIJ, 1197 MatGetValues_SeqSBAIJ, 1198 0, 1199 MatPrintHelp_SeqSBAIJ, 1200 MatScale_SeqSBAIJ, 1201 0, 1202 0, 1203 0, 1204 MatGetBlockSize_SeqSBAIJ, 1205 MatGetRowIJ_SeqSBAIJ, 1206 MatRestoreRowIJ_SeqSBAIJ, 1207 0, 1208 0, 1209 0, 1210 0, 1211 0, 1212 0, 1213 MatSetValuesBlocked_SeqSBAIJ, 1214 MatGetSubMatrix_SeqSBAIJ, 1215 0, 1216 0, 1217 MatGetPetscMaps_Petsc, 1218 0, 1219 0, 1220 0, 1221 0, 1222 0, 1223 0, 1224 MatGetRowMax_SeqSBAIJ}; 1225 1226 EXTERN_C_BEGIN 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1229 int MatStoreValues_SeqSBAIJ(Mat mat) 1230 { 1231 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1232 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1233 int ierr; 1234 1235 PetscFunctionBegin; 1236 if (aij->nonew != 1) { 1237 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1238 } 1239 1240 /* allocate space for values if not already there */ 1241 if (!aij->saved_values) { 1242 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1243 } 1244 1245 /* copy values over */ 1246 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 EXTERN_C_END 1250 1251 EXTERN_C_BEGIN 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1254 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1255 { 1256 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1257 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1258 1259 PetscFunctionBegin; 1260 if (aij->nonew != 1) { 1261 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1262 } 1263 if (!aij->saved_values) { 1264 SETERRQ(1,"Must call MatStoreValues(A);first"); 1265 } 1266 1267 /* copy values over */ 1268 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1269 PetscFunctionReturn(0); 1270 } 1271 EXTERN_C_END 1272 1273 EXTERN_C_BEGIN 1274 #undef __FUNCT__ 1275 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1276 int MatCreate_SeqSBAIJ(Mat B) 1277 { 1278 Mat_SeqSBAIJ *b; 1279 int ierr,size; 1280 1281 PetscFunctionBegin; 1282 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1283 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1284 B->m = B->M = PetscMax(B->m,B->M); 1285 B->n = B->N = PetscMax(B->n,B->N); 1286 1287 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1288 B->data = (void*)b; 1289 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1290 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1291 B->ops->destroy = MatDestroy_SeqSBAIJ; 1292 B->ops->view = MatView_SeqSBAIJ; 1293 B->factor = 0; 1294 B->lupivotthreshold = 1.0; 1295 B->mapping = 0; 1296 b->row = 0; 1297 b->icol = 0; 1298 b->reallocs = 0; 1299 b->saved_values = 0; 1300 1301 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1302 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1303 1304 b->sorted = PETSC_FALSE; 1305 b->roworiented = PETSC_TRUE; 1306 b->nonew = 0; 1307 b->diag = 0; 1308 b->solve_work = 0; 1309 b->mult_work = 0; 1310 b->spptr = 0; 1311 b->keepzeroedrows = PETSC_FALSE; 1312 1313 b->inew = 0; 1314 b->jnew = 0; 1315 b->anew = 0; 1316 b->a2anew = 0; 1317 b->permute = PETSC_FALSE; 1318 1319 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1320 "MatStoreValues_SeqSBAIJ", 1321 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1322 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1323 "MatRetrieveValues_SeqSBAIJ", 1324 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1325 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1326 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1327 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 EXTERN_C_END 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1334 /*@C 1335 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1336 compressed row) format. For good matrix assembly performance the 1337 user should preallocate the matrix storage by setting the parameter nz 1338 (or the array nnz). By setting these parameters accurately, performance 1339 during matrix assembly can be increased by more than a factor of 50. 1340 1341 Collective on Mat 1342 1343 Input Parameters: 1344 + A - the symmetric matrix 1345 . bs - size of block 1346 . nz - number of block nonzeros per block row (same for all rows) 1347 - nnz - array containing the number of block nonzeros in the various block rows 1348 (possibly different for each block row) or PETSC_NULL 1349 1350 Options Database Keys: 1351 . -mat_no_unroll - uses code that does not unroll the loops in the 1352 block calculations (much slower) 1353 . -mat_block_size - size of the blocks to use 1354 1355 Level: intermediate 1356 1357 Notes: 1358 The block AIJ format is compatible with standard Fortran 77 1359 storage. That is, the stored row and column indices can begin at 1360 either one (as in Fortran) or zero. See the users' manual for details. 1361 1362 Specify the preallocated storage with either nz or nnz (not both). 1363 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1364 allocation. For additional details, see the users manual chapter on 1365 matrices. 1366 1367 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1368 @*/ 1369 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1370 { 1371 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1372 int i,len,ierr,mbs,bs2; 1373 PetscTruth flg; 1374 int s_nz; 1375 1376 PetscFunctionBegin; 1377 B->preallocated = PETSC_TRUE; 1378 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1379 mbs = B->m/bs; 1380 bs2 = bs*bs; 1381 1382 if (mbs*bs != B->m) { 1383 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1384 } 1385 1386 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1387 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1388 if (nnz) { 1389 for (i=0; i<mbs; i++) { 1390 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1391 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1392 } 1393 } 1394 1395 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1396 if (!flg) { 1397 switch (bs) { 1398 case 1: 1399 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1400 B->ops->solve = MatSolve_SeqSBAIJ_1; 1401 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1402 B->ops->mult = MatMult_SeqSBAIJ_1; 1403 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1404 break; 1405 case 2: 1406 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1407 B->ops->solve = MatSolve_SeqSBAIJ_2; 1408 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1409 B->ops->mult = MatMult_SeqSBAIJ_2; 1410 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1411 break; 1412 case 3: 1413 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1414 B->ops->solve = MatSolve_SeqSBAIJ_3; 1415 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1416 B->ops->mult = MatMult_SeqSBAIJ_3; 1417 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1418 break; 1419 case 4: 1420 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1421 B->ops->solve = MatSolve_SeqSBAIJ_4; 1422 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1423 B->ops->mult = MatMult_SeqSBAIJ_4; 1424 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1425 break; 1426 case 5: 1427 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1428 B->ops->solve = MatSolve_SeqSBAIJ_5; 1429 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1430 B->ops->mult = MatMult_SeqSBAIJ_5; 1431 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1432 break; 1433 case 6: 1434 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1435 B->ops->solve = MatSolve_SeqSBAIJ_6; 1436 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1437 B->ops->mult = MatMult_SeqSBAIJ_6; 1438 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1439 break; 1440 case 7: 1441 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1442 B->ops->solve = MatSolve_SeqSBAIJ_7; 1443 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1444 B->ops->mult = MatMult_SeqSBAIJ_7; 1445 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1446 break; 1447 } 1448 } 1449 1450 b->mbs = mbs; 1451 b->nbs = mbs; 1452 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1453 if (!nnz) { 1454 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1455 else if (nz <= 0) nz = 1; 1456 for (i=0; i<mbs; i++) { 1457 b->imax[i] = nz; 1458 } 1459 nz = nz*mbs; /* total nz */ 1460 } else { 1461 nz = 0; 1462 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1463 } 1464 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1465 s_nz = nz; 1466 1467 /* allocate the matrix space */ 1468 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1469 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1470 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1471 b->j = (int*)(b->a + s_nz*bs2); 1472 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1473 b->i = b->j + s_nz; 1474 b->singlemalloc = PETSC_TRUE; 1475 1476 /* pointer to beginning of each row */ 1477 b->i[0] = 0; 1478 for (i=1; i<mbs+1; i++) { 1479 b->i[i] = b->i[i-1] + b->imax[i-1]; 1480 } 1481 1482 /* b->ilen will count nonzeros in each block row so far. */ 1483 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1484 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1485 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1486 1487 b->bs = bs; 1488 b->bs2 = bs2; 1489 b->s_nz = 0; 1490 b->s_maxnz = s_nz*bs2; 1491 1492 b->inew = 0; 1493 b->jnew = 0; 1494 b->anew = 0; 1495 b->a2anew = 0; 1496 b->permute = PETSC_FALSE; 1497 PetscFunctionReturn(0); 1498 } 1499 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatCreateSeqSBAIJ" 1503 /*@C 1504 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1505 compressed row) format. For good matrix assembly performance the 1506 user should preallocate the matrix storage by setting the parameter nz 1507 (or the array nnz). By setting these parameters accurately, performance 1508 during matrix assembly can be increased by more than a factor of 50. 1509 1510 Collective on MPI_Comm 1511 1512 Input Parameters: 1513 + comm - MPI communicator, set to PETSC_COMM_SELF 1514 . bs - size of block 1515 . m - number of rows, or number of columns 1516 . nz - number of block nonzeros per block row (same for all rows) 1517 - nnz - array containing the number of block nonzeros in the various block rows 1518 (possibly different for each block row) or PETSC_NULL 1519 1520 Output Parameter: 1521 . A - the symmetric matrix 1522 1523 Options Database Keys: 1524 . -mat_no_unroll - uses code that does not unroll the loops in the 1525 block calculations (much slower) 1526 . -mat_block_size - size of the blocks to use 1527 1528 Level: intermediate 1529 1530 Notes: 1531 The block AIJ format is compatible with standard Fortran 77 1532 storage. That is, the stored row and column indices can begin at 1533 either one (as in Fortran) or zero. See the users' manual for details. 1534 1535 Specify the preallocated storage with either nz or nnz (not both). 1536 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1537 allocation. For additional details, see the users manual chapter on 1538 matrices. 1539 1540 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1541 @*/ 1542 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1543 { 1544 int ierr; 1545 1546 PetscFunctionBegin; 1547 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1548 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1549 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1550 PetscFunctionReturn(0); 1551 } 1552 1553 #undef __FUNCT__ 1554 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1555 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1556 { 1557 Mat C; 1558 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1559 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1560 1561 PetscFunctionBegin; 1562 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1563 1564 *B = 0; 1565 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1566 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1567 c = (Mat_SeqSBAIJ*)C->data; 1568 1569 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1570 C->preallocated = PETSC_TRUE; 1571 C->factor = A->factor; 1572 c->row = 0; 1573 c->icol = 0; 1574 c->saved_values = 0; 1575 c->keepzeroedrows = a->keepzeroedrows; 1576 C->assembled = PETSC_TRUE; 1577 1578 c->bs = a->bs; 1579 c->bs2 = a->bs2; 1580 c->mbs = a->mbs; 1581 c->nbs = a->nbs; 1582 1583 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1584 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1585 for (i=0; i<mbs; i++) { 1586 c->imax[i] = a->imax[i]; 1587 c->ilen[i] = a->ilen[i]; 1588 } 1589 1590 /* allocate the matrix space */ 1591 c->singlemalloc = PETSC_TRUE; 1592 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1593 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1594 c->j = (int*)(c->a + nz*bs2); 1595 c->i = c->j + nz; 1596 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1597 if (mbs > 0) { 1598 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1599 if (cpvalues == MAT_COPY_VALUES) { 1600 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1601 } else { 1602 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1603 } 1604 } 1605 1606 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1607 c->sorted = a->sorted; 1608 c->roworiented = a->roworiented; 1609 c->nonew = a->nonew; 1610 1611 if (a->diag) { 1612 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1613 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1614 for (i=0; i<mbs; i++) { 1615 c->diag[i] = a->diag[i]; 1616 } 1617 } else c->diag = 0; 1618 c->s_nz = a->s_nz; 1619 c->s_maxnz = a->s_maxnz; 1620 c->solve_work = 0; 1621 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1622 c->mult_work = 0; 1623 *B = C; 1624 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1625 PetscFunctionReturn(0); 1626 } 1627 1628 EXTERN_C_BEGIN 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1631 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1632 { 1633 Mat_SeqSBAIJ *a; 1634 Mat B; 1635 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1636 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,*s_browlengths,maskcount; 1637 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1638 int *masked,nmask,tmp,bs2,ishift; 1639 PetscScalar *aa; 1640 MPI_Comm comm = ((PetscObject)viewer)->comm; 1641 1642 PetscFunctionBegin; 1643 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1644 bs2 = bs*bs; 1645 1646 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1647 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1648 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1649 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1650 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1651 M = header[1]; N = header[2]; nz = header[3]; 1652 1653 if (header[3] < 0) { 1654 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1655 } 1656 1657 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1658 1659 /* 1660 This code adds extra rows to make sure the number of rows is 1661 divisible by the blocksize 1662 */ 1663 mbs = M/bs; 1664 extra_rows = bs - M + bs*(mbs); 1665 if (extra_rows == bs) extra_rows = 0; 1666 else mbs++; 1667 if (extra_rows) { 1668 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1669 } 1670 1671 /* read in row lengths */ 1672 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1673 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1674 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1675 1676 /* read in column indices */ 1677 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1678 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1679 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1680 1681 /* loop over row lengths determining block row lengths */ 1682 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1683 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1684 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1685 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1686 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1687 masked = mask + mbs; 1688 rowcount = 0; nzcount = 0; 1689 for (i=0; i<mbs; i++) { 1690 nmask = 0; 1691 for (j=0; j<bs; j++) { 1692 kmax = rowlengths[rowcount]; 1693 for (k=0; k<kmax; k++) { 1694 tmp = jj[nzcount++]/bs; /* block col. index */ 1695 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1696 } 1697 rowcount++; 1698 } 1699 s_browlengths[i] += nmask; 1700 browlengths[i] = 2*s_browlengths[i]; 1701 1702 /* zero out the mask elements we set */ 1703 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1704 } 1705 1706 /* create our matrix */ 1707 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1708 B = *A; 1709 a = (Mat_SeqSBAIJ*)B->data; 1710 1711 /* set matrix "i" values */ 1712 a->i[0] = 0; 1713 for (i=1; i<= mbs; i++) { 1714 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1715 a->ilen[i-1] = s_browlengths[i-1]; 1716 } 1717 a->s_nz = 0; 1718 for (i=0; i<mbs; i++) a->s_nz += s_browlengths[i]; 1719 1720 /* read in nonzero values */ 1721 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1722 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1723 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1724 1725 /* set "a" and "j" values into matrix */ 1726 nzcount = 0; jcount = 0; 1727 for (i=0; i<mbs; i++) { 1728 nzcountb = nzcount; 1729 nmask = 0; 1730 for (j=0; j<bs; j++) { 1731 kmax = rowlengths[i*bs+j]; 1732 for (k=0; k<kmax; k++) { 1733 tmp = jj[nzcount++]/bs; /* block col. index */ 1734 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1735 } 1736 } 1737 /* sort the masked values */ 1738 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1739 1740 /* set "j" values into matrix */ 1741 maskcount = 1; 1742 for (j=0; j<nmask; j++) { 1743 a->j[jcount++] = masked[j]; 1744 mask[masked[j]] = maskcount++; 1745 } 1746 1747 /* set "a" values into matrix */ 1748 ishift = bs2*a->i[i]; 1749 for (j=0; j<bs; j++) { 1750 kmax = rowlengths[i*bs+j]; 1751 for (k=0; k<kmax; k++) { 1752 tmp = jj[nzcountb]/bs ; /* block col. index */ 1753 if (tmp >= i){ 1754 block = mask[tmp] - 1; 1755 point = jj[nzcountb] - bs*tmp; 1756 idx = ishift + bs2*block + j + bs*point; 1757 a->a[idx] = aa[nzcountb]; 1758 } 1759 nzcountb++; 1760 } 1761 } 1762 /* zero out the mask elements we set */ 1763 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1764 } 1765 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1766 1767 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1768 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1769 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1770 ierr = PetscFree(aa);CHKERRQ(ierr); 1771 ierr = PetscFree(jj);CHKERRQ(ierr); 1772 ierr = PetscFree(mask);CHKERRQ(ierr); 1773 1774 B->assembled = PETSC_TRUE; 1775 ierr = MatView_Private(B);CHKERRQ(ierr); 1776 PetscFunctionReturn(0); 1777 } 1778 EXTERN_C_END 1779 1780 1781 1782