1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,m = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((m+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 45 for (i=0; i<m; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 50 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 51 52 #undef __FUNCT__ 53 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 54 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 55 { 56 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 57 58 PetscFunctionBegin; 59 if (ia) { 60 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 61 } 62 *nn = a->mbs; 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 68 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 69 { 70 PetscFunctionBegin; 71 if (!ia) PetscFunctionReturn(0); 72 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 73 /* PetscFunctionReturn(0); */ 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 78 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 79 { 80 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 81 82 PetscFunctionBegin; 83 *bs = sbaij->bs; 84 PetscFunctionReturn(0); 85 } 86 87 #undef __FUNCT__ 88 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 89 int MatDestroy_SeqSBAIJ(Mat A) 90 { 91 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 92 int ierr; 93 94 PetscFunctionBegin; 95 #if defined(PETSC_USE_LOG) 96 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 97 #endif 98 ierr = PetscFree(a->a);CHKERRQ(ierr); 99 if (!a->singlemalloc) { 100 ierr = PetscFree(a->i);CHKERRQ(ierr); 101 ierr = PetscFree(a->j);CHKERRQ(ierr); 102 } 103 if (a->row) { 104 ierr = ISDestroy(a->row);CHKERRQ(ierr); 105 } 106 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 107 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 108 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 109 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 110 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 111 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 112 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 113 114 if (a->inew){ 115 ierr = PetscFree(a->inew);CHKERRQ(ierr); 116 a->inew = 0; 117 } 118 ierr = PetscFree(a);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 124 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 125 { 126 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 127 128 PetscFunctionBegin; 129 switch (op) { 130 case MAT_ROW_ORIENTED: 131 a->roworiented = PETSC_TRUE; 132 break; 133 case MAT_COLUMN_ORIENTED: 134 a->roworiented = PETSC_FALSE; 135 break; 136 case MAT_COLUMNS_SORTED: 137 a->sorted = PETSC_TRUE; 138 break; 139 case MAT_COLUMNS_UNSORTED: 140 a->sorted = PETSC_FALSE; 141 break; 142 case MAT_KEEP_ZEROED_ROWS: 143 a->keepzeroedrows = PETSC_TRUE; 144 break; 145 case MAT_NO_NEW_NONZERO_LOCATIONS: 146 a->nonew = 1; 147 break; 148 case MAT_NEW_NONZERO_LOCATION_ERR: 149 a->nonew = -1; 150 break; 151 case MAT_NEW_NONZERO_ALLOCATION_ERR: 152 a->nonew = -2; 153 break; 154 case MAT_YES_NEW_NONZERO_LOCATIONS: 155 a->nonew = 0; 156 break; 157 case MAT_ROWS_SORTED: 158 case MAT_ROWS_UNSORTED: 159 case MAT_YES_NEW_DIAGONALS: 160 case MAT_IGNORE_OFF_PROC_ENTRIES: 161 case MAT_USE_HASH_TABLE: 162 case MAT_USE_SINGLE_PRECISION_SOLVES: 163 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 164 break; 165 case MAT_NO_NEW_DIAGONALS: 166 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 167 default: 168 SETERRQ(PETSC_ERR_SUP,"unknown option"); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 175 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 176 { 177 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 178 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 179 MatScalar *aa,*aa_i; 180 PetscScalar *v_i; 181 182 PetscFunctionBegin; 183 bs = a->bs; 184 ai = a->i; 185 aj = a->j; 186 aa = a->a; 187 bs2 = a->bs2; 188 189 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 190 191 bn = row/bs; /* Block number */ 192 bp = row % bs; /* Block position */ 193 M = ai[bn+1] - ai[bn]; 194 *ncols = bs*M; 195 196 if (v) { 197 *v = 0; 198 if (*ncols) { 199 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 200 for (i=0; i<M; i++) { /* for each block in the block row */ 201 v_i = *v + i*bs; 202 aa_i = aa + bs2*(ai[bn] + i); 203 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 204 } 205 } 206 } 207 208 if (cols) { 209 *cols = 0; 210 if (*ncols) { 211 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 212 for (i=0; i<M; i++) { /* for each block in the block row */ 213 cols_i = *cols + i*bs; 214 itmp = bs*aj[ai[bn] + i]; 215 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 216 } 217 } 218 } 219 220 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 221 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 222 #ifdef column_search 223 v_i = *v + M*bs; 224 cols_i = *cols + M*bs; 225 for (i=0; i<bn; i++){ /* for each block row */ 226 M = ai[i+1] - ai[i]; 227 for (j=0; j<M; j++){ 228 itmp = aj[ai[i] + j]; /* block column value */ 229 if (itmp == bn){ 230 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 231 for (k=0; k<bs; k++) { 232 *cols_i++ = i*bs+k; 233 *v_i++ = aa_i[k]; 234 } 235 *ncols += bs; 236 break; 237 } 238 } 239 } 240 #endif 241 242 PetscFunctionReturn(0); 243 } 244 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 247 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 248 { 249 int ierr; 250 251 PetscFunctionBegin; 252 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 253 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 254 PetscFunctionReturn(0); 255 } 256 257 #undef __FUNCT__ 258 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 259 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 260 { 261 PetscFunctionBegin; 262 SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called"); 263 /* PetscFunctionReturn(0); */ 264 } 265 266 #undef __FUNCT__ 267 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 268 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 269 { 270 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 271 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 272 PetscScalar *aa; 273 FILE *file; 274 275 PetscFunctionBegin; 276 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 277 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 278 col_lens[0] = MAT_FILE_COOKIE; 279 280 col_lens[1] = A->m; 281 col_lens[2] = A->m; 282 col_lens[3] = a->s_nz*bs2; 283 284 /* store lengths of each row and write (including header) to file */ 285 count = 0; 286 for (i=0; i<a->mbs; i++) { 287 for (j=0; j<bs; j++) { 288 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 289 } 290 } 291 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 292 ierr = PetscFree(col_lens);CHKERRQ(ierr); 293 294 /* store column indices (zero start index) */ 295 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 296 count = 0; 297 for (i=0; i<a->mbs; i++) { 298 for (j=0; j<bs; j++) { 299 for (k=a->i[i]; k<a->i[i+1]; k++) { 300 for (l=0; l<bs; l++) { 301 jj[count++] = bs*a->j[k] + l; 302 } 303 } 304 } 305 } 306 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 307 ierr = PetscFree(jj);CHKERRQ(ierr); 308 309 /* store nonzero values */ 310 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 311 count = 0; 312 for (i=0; i<a->mbs; i++) { 313 for (j=0; j<bs; j++) { 314 for (k=a->i[i]; k<a->i[i+1]; k++) { 315 for (l=0; l<bs; l++) { 316 aa[count++] = a->a[bs2*k + l*bs + j]; 317 } 318 } 319 } 320 } 321 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 322 ierr = PetscFree(aa);CHKERRQ(ierr); 323 324 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 325 if (file) { 326 fprintf(file,"-matload_block_size %d\n",a->bs); 327 } 328 PetscFunctionReturn(0); 329 } 330 331 #undef __FUNCT__ 332 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 333 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 334 { 335 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 336 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 337 char *name; 338 PetscViewerFormat format; 339 340 PetscFunctionBegin; 341 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 342 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 343 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 344 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 345 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 346 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 347 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 348 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 349 for (i=0; i<a->mbs; i++) { 350 for (j=0; j<bs; j++) { 351 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 352 for (k=a->i[i]; k<a->i[i+1]; k++) { 353 for (l=0; l<bs; l++) { 354 #if defined(PETSC_USE_COMPLEX) 355 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 356 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 357 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 358 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 359 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 360 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 361 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 362 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 363 } 364 #else 365 if (a->a[bs2*k + l*bs + j] != 0.0) { 366 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 367 } 368 #endif 369 } 370 } 371 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 372 } 373 } 374 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 375 } else { 376 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 377 for (i=0; i<a->mbs; i++) { 378 for (j=0; j<bs; j++) { 379 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 380 for (k=a->i[i]; k<a->i[i+1]; k++) { 381 for (l=0; l<bs; l++) { 382 #if defined(PETSC_USE_COMPLEX) 383 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 384 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 385 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 386 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 387 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 388 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 389 } else { 390 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 391 } 392 #else 393 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 394 #endif 395 } 396 } 397 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 398 } 399 } 400 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 401 } 402 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 408 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 409 { 410 Mat A = (Mat) Aa; 411 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 412 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 413 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 414 MatScalar *aa; 415 MPI_Comm comm; 416 PetscViewer viewer; 417 418 PetscFunctionBegin; 419 /* 420 This is nasty. If this is called from an originally parallel matrix 421 then all processes call this,but only the first has the matrix so the 422 rest should return immediately. 423 */ 424 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 425 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 426 if (rank) PetscFunctionReturn(0); 427 428 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 429 430 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 431 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 432 433 /* loop over matrix elements drawing boxes */ 434 color = PETSC_DRAW_BLUE; 435 for (i=0,row=0; i<mbs; i++,row+=bs) { 436 for (j=a->i[i]; j<a->i[i+1]; j++) { 437 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 438 x_l = a->j[j]*bs; x_r = x_l + 1.0; 439 aa = a->a + j*bs2; 440 for (k=0; k<bs; k++) { 441 for (l=0; l<bs; l++) { 442 if (PetscRealPart(*aa++) >= 0.) continue; 443 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 444 } 445 } 446 } 447 } 448 color = PETSC_DRAW_CYAN; 449 for (i=0,row=0; i<mbs; i++,row+=bs) { 450 for (j=a->i[i]; j<a->i[i+1]; j++) { 451 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 452 x_l = a->j[j]*bs; x_r = x_l + 1.0; 453 aa = a->a + j*bs2; 454 for (k=0; k<bs; k++) { 455 for (l=0; l<bs; l++) { 456 if (PetscRealPart(*aa++) != 0.) continue; 457 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 458 } 459 } 460 } 461 } 462 463 color = PETSC_DRAW_RED; 464 for (i=0,row=0; i<mbs; i++,row+=bs) { 465 for (j=a->i[i]; j<a->i[i+1]; j++) { 466 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 467 x_l = a->j[j]*bs; x_r = x_l + 1.0; 468 aa = a->a + j*bs2; 469 for (k=0; k<bs; k++) { 470 for (l=0; l<bs; l++) { 471 if (PetscRealPart(*aa++) <= 0.) continue; 472 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 473 } 474 } 475 } 476 } 477 PetscFunctionReturn(0); 478 } 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 482 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 483 { 484 int ierr; 485 PetscReal xl,yl,xr,yr,w,h; 486 PetscDraw draw; 487 PetscTruth isnull; 488 489 PetscFunctionBegin; 490 491 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 492 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 493 494 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 495 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 496 xr += w; yr += h; xl = -w; yl = -h; 497 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 498 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 499 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNCT__ 504 #define __FUNCT__ "MatView_SeqSBAIJ" 505 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 506 { 507 int ierr; 508 PetscTruth issocket,isascii,isbinary,isdraw; 509 510 PetscFunctionBegin; 511 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 512 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 513 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 514 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 515 if (issocket) { 516 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 517 } else if (isascii){ 518 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 519 } else if (isbinary) { 520 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 521 } else if (isdraw) { 522 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 523 } else { 524 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 525 } 526 PetscFunctionReturn(0); 527 } 528 529 530 #undef __FUNCT__ 531 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 532 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 533 { 534 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 535 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 536 int *ai = a->i,*ailen = a->ilen; 537 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 538 MatScalar *ap,*aa = a->a,zero = 0.0; 539 540 PetscFunctionBegin; 541 for (k=0; k<m; k++) { /* loop over rows */ 542 row = im[k]; brow = row/bs; 543 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 544 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 545 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 546 nrow = ailen[brow]; 547 for (l=0; l<n; l++) { /* loop over columns */ 548 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 549 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 550 col = in[l] ; 551 bcol = col/bs; 552 cidx = col%bs; 553 ridx = row%bs; 554 high = nrow; 555 low = 0; /* assume unsorted */ 556 while (high-low > 5) { 557 t = (low+high)/2; 558 if (rp[t] > bcol) high = t; 559 else low = t; 560 } 561 for (i=low; i<high; i++) { 562 if (rp[i] > bcol) break; 563 if (rp[i] == bcol) { 564 *v++ = ap[bs2*i+bs*cidx+ridx]; 565 goto finished; 566 } 567 } 568 *v++ = zero; 569 finished:; 570 } 571 } 572 PetscFunctionReturn(0); 573 } 574 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 578 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 579 { 580 PetscFunctionBegin; 581 SETERRQ(1,"Function not yet written for SBAIJ format"); 582 /* PetscFunctionReturn(0); */ 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 587 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 588 { 589 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 590 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 591 int m = A->m,*ip,N,*ailen = a->ilen; 592 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 593 MatScalar *aa = a->a,*ap; 594 595 PetscFunctionBegin; 596 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 597 598 if (m) rmax = ailen[0]; 599 for (i=1; i<mbs; i++) { 600 /* move each row back by the amount of empty slots (fshift) before it*/ 601 fshift += imax[i-1] - ailen[i-1]; 602 rmax = PetscMax(rmax,ailen[i]); 603 if (fshift) { 604 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 605 N = ailen[i]; 606 for (j=0; j<N; j++) { 607 ip[j-fshift] = ip[j]; 608 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 609 } 610 } 611 ai[i] = ai[i-1] + ailen[i-1]; 612 } 613 if (mbs) { 614 fshift += imax[mbs-1] - ailen[mbs-1]; 615 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 616 } 617 /* reset ilen and imax for each row */ 618 for (i=0; i<mbs; i++) { 619 ailen[i] = imax[i] = ai[i+1] - ai[i]; 620 } 621 a->s_nz = ai[mbs]; 622 623 /* diagonals may have moved, so kill the diagonal pointers */ 624 if (fshift && a->diag) { 625 ierr = PetscFree(a->diag);CHKERRQ(ierr); 626 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 627 a->diag = 0; 628 } 629 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 630 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 631 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 632 a->reallocs); 633 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 634 a->reallocs = 0; 635 A->info.nz_unneeded = (PetscReal)fshift*bs2; 636 637 PetscFunctionReturn(0); 638 } 639 640 /* 641 This function returns an array of flags which indicate the locations of contiguous 642 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 643 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 644 Assume: sizes should be long enough to hold all the values. 645 */ 646 #undef __FUNCT__ 647 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 648 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 649 { 650 int i,j,k,row; 651 PetscTruth flg; 652 653 PetscFunctionBegin; 654 for (i=0,j=0; i<n; j++) { 655 row = idx[i]; 656 if (row%bs!=0) { /* Not the begining of a block */ 657 sizes[j] = 1; 658 i++; 659 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 660 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 661 i++; 662 } else { /* Begining of the block, so check if the complete block exists */ 663 flg = PETSC_TRUE; 664 for (k=1; k<bs; k++) { 665 if (row+k != idx[i+k]) { /* break in the block */ 666 flg = PETSC_FALSE; 667 break; 668 } 669 } 670 if (flg == PETSC_TRUE) { /* No break in the bs */ 671 sizes[j] = bs; 672 i+= bs; 673 } else { 674 sizes[j] = 1; 675 i++; 676 } 677 } 678 } 679 *bs_max = j; 680 PetscFunctionReturn(0); 681 } 682 683 #undef __FUNCT__ 684 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 685 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 686 { 687 Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data; 688 int ierr,i,j,k,count,is_n,*is_idx,*rows; 689 int bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max; 690 PetscScalar zero = 0.0; 691 MatScalar *aa; 692 693 PetscFunctionBegin; 694 /* Make a copy of the IS and sort it */ 695 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 696 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 697 698 /* allocate memory for rows,sizes */ 699 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 700 sizes = rows + is_n; 701 702 /* initialize copy IS values to rows, and sort them */ 703 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 704 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 705 if (sbaij->keepzeroedrows) { /* do not change nonzero structure */ 706 for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */ 707 bs_max = is_n; /* bs_max: num. of contiguous block row in the row */ 708 } else { 709 ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 710 } 711 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 712 713 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 714 row = rows[j]; /* row to be zeroed */ 715 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 716 count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */ 717 aa = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs); 718 if (sizes[i] == bs && !sbaij->keepzeroedrows) { 719 if (diag) { 720 if (sbaij->ilen[row/bs] > 0) { 721 sbaij->ilen[row/bs] = 1; 722 sbaij->j[sbaij->i[row/bs]] = row/bs; 723 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 724 } 725 /* Now insert all the diagoanl values for this bs */ 726 for (k=0; k<bs; k++) { 727 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 728 } 729 } else { /* (!diag) */ 730 sbaij->ilen[row/bs] = 0; 731 } /* end (!diag) */ 732 } else { /* (sizes[i] != bs), broken block */ 733 #if defined (PETSC_USE_DEBUG) 734 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 735 #endif 736 for (k=0; k<count; k++) { 737 aa[0] = zero; 738 aa+=bs; 739 } 740 if (diag) { 741 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 742 } 743 } 744 } 745 746 ierr = PetscFree(rows);CHKERRQ(ierr); 747 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 748 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 749 PetscFunctionReturn(0); 750 } 751 752 /* Only add/insert a(i,j) with i<=j (blocks). 753 Any a(i,j) with i>j input by user is ingored. 754 */ 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 758 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 759 { 760 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 761 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 762 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 763 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 764 int ridx,cidx,bs2=a->bs2,ierr; 765 MatScalar *ap,value,*aa=a->a,*bap; 766 767 PetscFunctionBegin; 768 769 for (k=0; k<m; k++) { /* loop over added rows */ 770 row = im[k]; /* row number */ 771 brow = row/bs; /* block row number */ 772 if (row < 0) continue; 773 #if defined(PETSC_USE_BOPT_g) 774 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 775 #endif 776 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 777 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 778 rmax = imax[brow]; /* maximum space allocated for this row */ 779 nrow = ailen[brow]; /* actual length of this row */ 780 low = 0; 781 782 for (l=0; l<n; l++) { /* loop over added columns */ 783 if (in[l] < 0) continue; 784 #if defined(PETSC_USE_BOPT_g) 785 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 786 #endif 787 col = in[l]; 788 bcol = col/bs; /* block col number */ 789 790 if (brow <= bcol){ 791 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 792 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 793 /* element value a(k,l) */ 794 if (roworiented) { 795 value = v[l + k*n]; 796 } else { 797 value = v[k + l*m]; 798 } 799 800 /* move pointer bap to a(k,l) quickly and add/insert value */ 801 if (!sorted) low = 0; high = nrow; 802 while (high-low > 7) { 803 t = (low+high)/2; 804 if (rp[t] > bcol) high = t; 805 else low = t; 806 } 807 for (i=low; i<high; i++) { 808 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 809 if (rp[i] > bcol) break; 810 if (rp[i] == bcol) { 811 bap = ap + bs2*i + bs*cidx + ridx; 812 if (is == ADD_VALUES) *bap += value; 813 else *bap = value; 814 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 815 if (brow == bcol && ridx < cidx){ 816 bap = ap + bs2*i + bs*ridx + cidx; 817 if (is == ADD_VALUES) *bap += value; 818 else *bap = value; 819 } 820 goto noinsert1; 821 } 822 } 823 824 if (nonew == 1) goto noinsert1; 825 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 826 if (nrow >= rmax) { 827 /* there is no extra room in row, therefore enlarge */ 828 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 829 MatScalar *new_a; 830 831 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 832 833 /* Malloc new storage space */ 834 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 835 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 836 new_j = (int*)(new_a + bs2*new_nz); 837 new_i = new_j + new_nz; 838 839 /* copy over old data into new slots */ 840 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 841 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 842 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 843 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 844 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 845 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 846 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 847 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 848 /* free up old matrix storage */ 849 ierr = PetscFree(a->a);CHKERRQ(ierr); 850 if (!a->singlemalloc) { 851 ierr = PetscFree(a->i);CHKERRQ(ierr); 852 ierr = PetscFree(a->j);CHKERRQ(ierr); 853 } 854 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 855 a->singlemalloc = PETSC_TRUE; 856 857 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 858 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 859 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 860 a->s_maxnz += bs2*CHUNKSIZE; 861 a->reallocs++; 862 a->s_nz++; 863 } 864 865 N = nrow++ - 1; 866 /* shift up all the later entries in this row */ 867 for (ii=N; ii>=i; ii--) { 868 rp[ii+1] = rp[ii]; 869 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 870 } 871 if (N>=i) { 872 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 873 } 874 rp[i] = bcol; 875 ap[bs2*i + bs*cidx + ridx] = value; 876 noinsert1:; 877 low = i; 878 /* } */ 879 } 880 } /* end of if .. if.. */ 881 } /* end of loop over added columns */ 882 ailen[brow] = nrow; 883 } /* end of loop over added rows */ 884 885 PetscFunctionReturn(0); 886 } 887 888 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*); 889 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal); 890 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 891 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 892 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 893 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 894 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 895 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 896 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 897 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 898 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 899 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 900 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 901 extern int MatZeroEntries_SeqSBAIJ(Mat); 902 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 903 904 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 905 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 906 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 907 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 908 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 909 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 910 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 911 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 912 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 913 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 914 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 915 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 916 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 917 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 918 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 919 920 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 921 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 922 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 923 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 924 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 925 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 926 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 927 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 928 929 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 930 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 931 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 932 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 933 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 934 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 935 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 936 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 937 938 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 939 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 940 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 941 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 942 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 943 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 944 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 945 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 946 947 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 948 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 949 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 950 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 951 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 952 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 953 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 954 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 955 956 #ifdef HAVE_MatICCFactor 957 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 960 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level) 961 { 962 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 963 Mat outA; 964 int ierr; 965 PetscTruth row_identity,col_identity; 966 967 PetscFunctionBegin; 968 /* 969 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 970 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 971 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 972 if (!row_identity || !col_identity) { 973 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 974 } 975 */ 976 977 outA = inA; 978 inA->factor = FACTOR_CHOLESKY; 979 980 if (!a->diag) { 981 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 982 } 983 /* 984 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 985 for ILU(0) factorization with natural ordering 986 */ 987 switch (a->bs) { 988 case 1: 989 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 990 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 991 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 992 case 2: 993 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 994 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 995 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 996 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 997 break; 998 case 3: 999 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1000 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1001 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1002 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1003 break; 1004 case 4: 1005 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1006 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1007 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1008 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1009 break; 1010 case 5: 1011 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1012 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1013 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1014 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1015 break; 1016 case 6: 1017 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1018 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1019 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1020 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1021 break; 1022 case 7: 1023 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1024 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1025 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1026 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1027 break; 1028 default: 1029 a->row = row; 1030 a->icol = col; 1031 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1032 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1033 1034 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1035 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1036 PetscLogObjectParent(inA,a->icol); 1037 1038 if (!a->solve_work) { 1039 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1040 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1041 } 1042 } 1043 1044 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1045 1046 PetscFunctionReturn(0); 1047 } 1048 #endif 1049 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1052 int MatPrintHelp_SeqSBAIJ(Mat A) 1053 { 1054 static PetscTruth called = PETSC_FALSE; 1055 MPI_Comm comm = A->comm; 1056 int ierr; 1057 1058 PetscFunctionBegin; 1059 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1060 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1061 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 EXTERN_C_BEGIN 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1068 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1069 { 1070 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1071 int i,nz,n; 1072 1073 PetscFunctionBegin; 1074 nz = baij->s_maxnz; 1075 n = mat->n; 1076 for (i=0; i<nz; i++) { 1077 baij->j[i] = indices[i]; 1078 } 1079 baij->s_nz = nz; 1080 for (i=0; i<n; i++) { 1081 baij->ilen[i] = baij->imax[i]; 1082 } 1083 1084 PetscFunctionReturn(0); 1085 } 1086 EXTERN_C_END 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1090 /*@ 1091 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1092 in the matrix. 1093 1094 Input Parameters: 1095 + mat - the SeqSBAIJ matrix 1096 - indices - the column indices 1097 1098 Level: advanced 1099 1100 Notes: 1101 This can be called if you have precomputed the nonzero structure of the 1102 matrix and want to provide it to the matrix object to improve the performance 1103 of the MatSetValues() operation. 1104 1105 You MUST have set the correct numbers of nonzeros per row in the call to 1106 MatCreateSeqSBAIJ(). 1107 1108 MUST be called before any calls to MatSetValues() 1109 1110 .seealso: MatCreateSeqSBAIJ 1111 @*/ 1112 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1113 { 1114 int ierr,(*f)(Mat,int *); 1115 1116 PetscFunctionBegin; 1117 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1118 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1119 if (f) { 1120 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1121 } else { 1122 SETERRQ(1,"Wrong type of matrix to set column indices"); 1123 } 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1129 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1130 { 1131 int ierr; 1132 1133 PetscFunctionBegin; 1134 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 /* -------------------------------------------------------------------*/ 1139 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1140 MatGetRow_SeqSBAIJ, 1141 MatRestoreRow_SeqSBAIJ, 1142 MatMult_SeqSBAIJ_N, 1143 MatMultAdd_SeqSBAIJ_N, 1144 MatMultTranspose_SeqSBAIJ, 1145 MatMultTransposeAdd_SeqSBAIJ, 1146 MatSolve_SeqSBAIJ_N, 1147 0, 1148 0, 1149 0, 1150 0, 1151 MatCholeskyFactor_SeqSBAIJ, 1152 MatRelax_SeqSBAIJ, 1153 MatTranspose_SeqSBAIJ, 1154 MatGetInfo_SeqSBAIJ, 1155 MatEqual_SeqSBAIJ, 1156 MatGetDiagonal_SeqSBAIJ, 1157 MatDiagonalScale_SeqSBAIJ, 1158 MatNorm_SeqSBAIJ, 1159 0, 1160 MatAssemblyEnd_SeqSBAIJ, 1161 0, 1162 MatSetOption_SeqSBAIJ, 1163 MatZeroEntries_SeqSBAIJ, 1164 MatZeroRows_SeqSBAIJ, 1165 0, 1166 0, 1167 MatCholeskyFactorSymbolic_SeqSBAIJ, 1168 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1169 MatSetUpPreallocation_SeqSBAIJ, 1170 0, 1171 MatICCFactorSymbolic_SeqSBAIJ, 1172 0, 1173 0, 1174 MatDuplicate_SeqSBAIJ, 1175 0, 1176 0, 1177 0, 1178 0, 1179 0, 1180 MatGetSubMatrices_SeqSBAIJ, 1181 MatIncreaseOverlap_SeqSBAIJ, 1182 MatGetValues_SeqSBAIJ, 1183 0, 1184 MatPrintHelp_SeqSBAIJ, 1185 MatScale_SeqSBAIJ, 1186 0, 1187 0, 1188 0, 1189 MatGetBlockSize_SeqSBAIJ, 1190 MatGetRowIJ_SeqSBAIJ, 1191 MatRestoreRowIJ_SeqSBAIJ, 1192 0, 1193 0, 1194 0, 1195 0, 1196 0, 1197 0, 1198 MatSetValuesBlocked_SeqSBAIJ, 1199 MatGetSubMatrix_SeqSBAIJ, 1200 0, 1201 0, 1202 MatGetPetscMaps_Petsc, 1203 0, 1204 0, 1205 0, 1206 0, 1207 0, 1208 0, 1209 MatGetRowMax_SeqSBAIJ}; 1210 1211 EXTERN_C_BEGIN 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1214 int MatStoreValues_SeqSBAIJ(Mat mat) 1215 { 1216 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1217 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1218 int ierr; 1219 1220 PetscFunctionBegin; 1221 if (aij->nonew != 1) { 1222 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1223 } 1224 1225 /* allocate space for values if not already there */ 1226 if (!aij->saved_values) { 1227 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1228 } 1229 1230 /* copy values over */ 1231 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 EXTERN_C_END 1235 1236 EXTERN_C_BEGIN 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1239 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1240 { 1241 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1242 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1243 1244 PetscFunctionBegin; 1245 if (aij->nonew != 1) { 1246 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1247 } 1248 if (!aij->saved_values) { 1249 SETERRQ(1,"Must call MatStoreValues(A);first"); 1250 } 1251 1252 /* copy values over */ 1253 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 EXTERN_C_END 1257 1258 EXTERN_C_BEGIN 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1261 int MatCreate_SeqSBAIJ(Mat B) 1262 { 1263 Mat_SeqSBAIJ *b; 1264 int ierr,size; 1265 1266 PetscFunctionBegin; 1267 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1268 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1269 B->m = B->M = PetscMax(B->m,B->M); 1270 B->n = B->N = PetscMax(B->n,B->N); 1271 1272 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1273 B->data = (void*)b; 1274 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1275 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1276 B->ops->destroy = MatDestroy_SeqSBAIJ; 1277 B->ops->view = MatView_SeqSBAIJ; 1278 B->factor = 0; 1279 B->lupivotthreshold = 1.0; 1280 B->mapping = 0; 1281 b->row = 0; 1282 b->icol = 0; 1283 b->reallocs = 0; 1284 b->saved_values = 0; 1285 1286 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1287 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1288 1289 b->sorted = PETSC_FALSE; 1290 b->roworiented = PETSC_TRUE; 1291 b->nonew = 0; 1292 b->diag = 0; 1293 b->solve_work = 0; 1294 b->mult_work = 0; 1295 b->spptr = 0; 1296 b->keepzeroedrows = PETSC_FALSE; 1297 1298 b->inew = 0; 1299 b->jnew = 0; 1300 b->anew = 0; 1301 b->a2anew = 0; 1302 b->permute = PETSC_FALSE; 1303 1304 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1305 "MatStoreValues_SeqSBAIJ", 1306 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1308 "MatRetrieveValues_SeqSBAIJ", 1309 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1311 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1312 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 EXTERN_C_END 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1319 /*@C 1320 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1321 compressed row) format. For good matrix assembly performance the 1322 user should preallocate the matrix storage by setting the parameter nz 1323 (or the array nnz). By setting these parameters accurately, performance 1324 during matrix assembly can be increased by more than a factor of 50. 1325 1326 Collective on Mat 1327 1328 Input Parameters: 1329 + A - the symmetric matrix 1330 . bs - size of block 1331 . nz - number of block nonzeros per block row (same for all rows) 1332 - nnz - array containing the number of block nonzeros in the various block rows 1333 (possibly different for each block row) or PETSC_NULL 1334 1335 Options Database Keys: 1336 . -mat_no_unroll - uses code that does not unroll the loops in the 1337 block calculations (much slower) 1338 . -mat_block_size - size of the blocks to use 1339 1340 Level: intermediate 1341 1342 Notes: 1343 The block AIJ format is compatible with standard Fortran 77 1344 storage. That is, the stored row and column indices can begin at 1345 either one (as in Fortran) or zero. See the users' manual for details. 1346 1347 Specify the preallocated storage with either nz or nnz (not both). 1348 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1349 allocation. For additional details, see the users manual chapter on 1350 matrices. 1351 1352 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1353 @*/ 1354 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1355 { 1356 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1357 int i,len,ierr,mbs,bs2; 1358 PetscTruth flg; 1359 int s_nz; 1360 1361 PetscFunctionBegin; 1362 B->preallocated = PETSC_TRUE; 1363 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1364 mbs = B->m/bs; 1365 bs2 = bs*bs; 1366 1367 if (mbs*bs != B->m) { 1368 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1369 } 1370 1371 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1372 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1373 if (nnz) { 1374 for (i=0; i<mbs; i++) { 1375 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1376 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1377 } 1378 } 1379 1380 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1381 if (!flg) { 1382 switch (bs) { 1383 case 1: 1384 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1385 B->ops->solve = MatSolve_SeqSBAIJ_1; 1386 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1387 B->ops->mult = MatMult_SeqSBAIJ_1; 1388 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1389 break; 1390 case 2: 1391 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1392 B->ops->solve = MatSolve_SeqSBAIJ_2; 1393 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1394 B->ops->mult = MatMult_SeqSBAIJ_2; 1395 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1396 break; 1397 case 3: 1398 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1399 B->ops->solve = MatSolve_SeqSBAIJ_3; 1400 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1401 B->ops->mult = MatMult_SeqSBAIJ_3; 1402 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1403 break; 1404 case 4: 1405 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1406 B->ops->solve = MatSolve_SeqSBAIJ_4; 1407 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1408 B->ops->mult = MatMult_SeqSBAIJ_4; 1409 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1410 break; 1411 case 5: 1412 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1413 B->ops->solve = MatSolve_SeqSBAIJ_5; 1414 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1415 B->ops->mult = MatMult_SeqSBAIJ_5; 1416 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1417 break; 1418 case 6: 1419 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1420 B->ops->solve = MatSolve_SeqSBAIJ_6; 1421 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1422 B->ops->mult = MatMult_SeqSBAIJ_6; 1423 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1424 break; 1425 case 7: 1426 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1427 B->ops->solve = MatSolve_SeqSBAIJ_7; 1428 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1429 B->ops->mult = MatMult_SeqSBAIJ_7; 1430 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1431 break; 1432 } 1433 } 1434 1435 b->mbs = mbs; 1436 b->nbs = mbs; 1437 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1438 if (!nnz) { 1439 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1440 else if (nz <= 0) nz = 1; 1441 for (i=0; i<mbs; i++) { 1442 b->imax[i] = nz; 1443 } 1444 nz = nz*mbs; /* total nz */ 1445 } else { 1446 nz = 0; 1447 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1448 } 1449 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1450 s_nz = nz; 1451 1452 /* allocate the matrix space */ 1453 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1454 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1455 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1456 b->j = (int*)(b->a + s_nz*bs2); 1457 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1458 b->i = b->j + s_nz; 1459 b->singlemalloc = PETSC_TRUE; 1460 1461 /* pointer to beginning of each row */ 1462 b->i[0] = 0; 1463 for (i=1; i<mbs+1; i++) { 1464 b->i[i] = b->i[i-1] + b->imax[i-1]; 1465 } 1466 1467 /* b->ilen will count nonzeros in each block row so far. */ 1468 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1469 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1470 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1471 1472 b->bs = bs; 1473 b->bs2 = bs2; 1474 b->s_nz = 0; 1475 b->s_maxnz = s_nz*bs2; 1476 1477 b->inew = 0; 1478 b->jnew = 0; 1479 b->anew = 0; 1480 b->a2anew = 0; 1481 b->permute = PETSC_FALSE; 1482 PetscFunctionReturn(0); 1483 } 1484 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatCreateSeqSBAIJ" 1488 /*@C 1489 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1490 compressed row) format. For good matrix assembly performance the 1491 user should preallocate the matrix storage by setting the parameter nz 1492 (or the array nnz). By setting these parameters accurately, performance 1493 during matrix assembly can be increased by more than a factor of 50. 1494 1495 Collective on MPI_Comm 1496 1497 Input Parameters: 1498 + comm - MPI communicator, set to PETSC_COMM_SELF 1499 . bs - size of block 1500 . m - number of rows, or number of columns 1501 . nz - number of block nonzeros per block row (same for all rows) 1502 - nnz - array containing the number of block nonzeros in the various block rows 1503 (possibly different for each block row) or PETSC_NULL 1504 1505 Output Parameter: 1506 . A - the symmetric matrix 1507 1508 Options Database Keys: 1509 . -mat_no_unroll - uses code that does not unroll the loops in the 1510 block calculations (much slower) 1511 . -mat_block_size - size of the blocks to use 1512 1513 Level: intermediate 1514 1515 Notes: 1516 The block AIJ format is compatible with standard Fortran 77 1517 storage. That is, the stored row and column indices can begin at 1518 either one (as in Fortran) or zero. See the users' manual for details. 1519 1520 Specify the preallocated storage with either nz or nnz (not both). 1521 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1522 allocation. For additional details, see the users manual chapter on 1523 matrices. 1524 1525 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1526 @*/ 1527 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1528 { 1529 int ierr; 1530 1531 PetscFunctionBegin; 1532 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1533 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1534 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1540 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1541 { 1542 Mat C; 1543 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1544 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1545 1546 PetscFunctionBegin; 1547 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1548 1549 *B = 0; 1550 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1551 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1552 c = (Mat_SeqSBAIJ*)C->data; 1553 1554 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1555 C->preallocated = PETSC_TRUE; 1556 C->factor = A->factor; 1557 c->row = 0; 1558 c->icol = 0; 1559 c->saved_values = 0; 1560 c->keepzeroedrows = a->keepzeroedrows; 1561 C->assembled = PETSC_TRUE; 1562 1563 c->bs = a->bs; 1564 c->bs2 = a->bs2; 1565 c->mbs = a->mbs; 1566 c->nbs = a->nbs; 1567 1568 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1569 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1570 for (i=0; i<mbs; i++) { 1571 c->imax[i] = a->imax[i]; 1572 c->ilen[i] = a->ilen[i]; 1573 } 1574 1575 /* allocate the matrix space */ 1576 c->singlemalloc = PETSC_TRUE; 1577 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1578 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1579 c->j = (int*)(c->a + nz*bs2); 1580 c->i = c->j + nz; 1581 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1582 if (mbs > 0) { 1583 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1584 if (cpvalues == MAT_COPY_VALUES) { 1585 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1586 } else { 1587 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1588 } 1589 } 1590 1591 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1592 c->sorted = a->sorted; 1593 c->roworiented = a->roworiented; 1594 c->nonew = a->nonew; 1595 1596 if (a->diag) { 1597 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1598 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1599 for (i=0; i<mbs; i++) { 1600 c->diag[i] = a->diag[i]; 1601 } 1602 } else c->diag = 0; 1603 c->s_nz = a->s_nz; 1604 c->s_maxnz = a->s_maxnz; 1605 c->solve_work = 0; 1606 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1607 c->mult_work = 0; 1608 *B = C; 1609 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1610 PetscFunctionReturn(0); 1611 } 1612 1613 EXTERN_C_BEGIN 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1616 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1617 { 1618 Mat_SeqSBAIJ *a; 1619 Mat B; 1620 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1621 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,*s_browlengths,maskcount; 1622 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1623 int *masked,nmask,tmp,bs2,ishift; 1624 PetscScalar *aa; 1625 MPI_Comm comm = ((PetscObject)viewer)->comm; 1626 1627 PetscFunctionBegin; 1628 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1629 bs2 = bs*bs; 1630 1631 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1632 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1633 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1634 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1635 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1636 M = header[1]; N = header[2]; nz = header[3]; 1637 1638 if (header[3] < 0) { 1639 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1640 } 1641 1642 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1643 1644 /* 1645 This code adds extra rows to make sure the number of rows is 1646 divisible by the blocksize 1647 */ 1648 mbs = M/bs; 1649 extra_rows = bs - M + bs*(mbs); 1650 if (extra_rows == bs) extra_rows = 0; 1651 else mbs++; 1652 if (extra_rows) { 1653 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1654 } 1655 1656 /* read in row lengths */ 1657 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1658 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1659 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1660 1661 /* read in column indices */ 1662 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1663 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1664 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1665 1666 /* loop over row lengths determining block row lengths */ 1667 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1668 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1669 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1670 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1671 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1672 masked = mask + mbs; 1673 rowcount = 0; nzcount = 0; 1674 for (i=0; i<mbs; i++) { 1675 nmask = 0; 1676 for (j=0; j<bs; j++) { 1677 kmax = rowlengths[rowcount]; 1678 for (k=0; k<kmax; k++) { 1679 tmp = jj[nzcount++]/bs; /* block col. index */ 1680 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1681 } 1682 rowcount++; 1683 } 1684 s_browlengths[i] += nmask; 1685 browlengths[i] = 2*s_browlengths[i]; 1686 1687 /* zero out the mask elements we set */ 1688 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1689 } 1690 1691 /* create our matrix */ 1692 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1693 B = *A; 1694 a = (Mat_SeqSBAIJ*)B->data; 1695 1696 /* set matrix "i" values */ 1697 a->i[0] = 0; 1698 for (i=1; i<= mbs; i++) { 1699 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1700 a->ilen[i-1] = s_browlengths[i-1]; 1701 } 1702 a->s_nz = 0; 1703 for (i=0; i<mbs; i++) a->s_nz += s_browlengths[i]; 1704 1705 /* read in nonzero values */ 1706 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1707 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1708 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1709 1710 /* set "a" and "j" values into matrix */ 1711 nzcount = 0; jcount = 0; 1712 for (i=0; i<mbs; i++) { 1713 nzcountb = nzcount; 1714 nmask = 0; 1715 for (j=0; j<bs; j++) { 1716 kmax = rowlengths[i*bs+j]; 1717 for (k=0; k<kmax; k++) { 1718 tmp = jj[nzcount++]/bs; /* block col. index */ 1719 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1720 } 1721 } 1722 /* sort the masked values */ 1723 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1724 1725 /* set "j" values into matrix */ 1726 maskcount = 1; 1727 for (j=0; j<nmask; j++) { 1728 a->j[jcount++] = masked[j]; 1729 mask[masked[j]] = maskcount++; 1730 } 1731 1732 /* set "a" values into matrix */ 1733 ishift = bs2*a->i[i]; 1734 for (j=0; j<bs; j++) { 1735 kmax = rowlengths[i*bs+j]; 1736 for (k=0; k<kmax; k++) { 1737 tmp = jj[nzcountb]/bs ; /* block col. index */ 1738 if (tmp >= i){ 1739 block = mask[tmp] - 1; 1740 point = jj[nzcountb] - bs*tmp; 1741 idx = ishift + bs2*block + j + bs*point; 1742 a->a[idx] = aa[nzcountb]; 1743 } 1744 nzcountb++; 1745 } 1746 } 1747 /* zero out the mask elements we set */ 1748 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1749 } 1750 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1751 1752 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1753 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1754 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1755 ierr = PetscFree(aa);CHKERRQ(ierr); 1756 ierr = PetscFree(jj);CHKERRQ(ierr); 1757 ierr = PetscFree(mask);CHKERRQ(ierr); 1758 1759 B->assembled = PETSC_TRUE; 1760 ierr = MatView_Private(B);CHKERRQ(ierr); 1761 PetscFunctionReturn(0); 1762 } 1763 EXTERN_C_END 1764 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1767 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1768 { 1769 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1770 MatScalar *aa=a->a,*v,*v1; 1771 PetscScalar *x,*b,*t,sum,d; 1772 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1773 int nz,nz1,*vj,*vj1,i; 1774 1775 PetscFunctionBegin; 1776 its = its*lits; 1777 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1778 1779 if (bs > 1) 1780 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1781 1782 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1783 if (xx != bb) { 1784 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1785 } else { 1786 b = x; 1787 } 1788 1789 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1790 1791 if (flag & SOR_ZERO_INITIAL_GUESS) { 1792 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1793 for (i=0; i<m; i++) 1794 t[i] = b[i]; 1795 1796 for (i=0; i<m; i++){ 1797 d = *(aa + ai[i]); /* diag[i] */ 1798 v = aa + ai[i] + 1; 1799 vj = aj + ai[i] + 1; 1800 nz = ai[i+1] - ai[i] - 1; 1801 x[i] = omega*t[i]/d; 1802 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1803 PetscLogFlops(2*nz-1); 1804 } 1805 } 1806 1807 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1808 for (i=0; i<m; i++) 1809 t[i] = b[i]; 1810 1811 for (i=0; i<m-1; i++){ /* update rhs */ 1812 v = aa + ai[i] + 1; 1813 vj = aj + ai[i] + 1; 1814 nz = ai[i+1] - ai[i] - 1; 1815 while (nz--) t[*vj++] -= x[i]*(*v++); 1816 PetscLogFlops(2*nz-1); 1817 } 1818 for (i=m-1; i>=0; i--){ 1819 d = *(aa + ai[i]); 1820 v = aa + ai[i] + 1; 1821 vj = aj + ai[i] + 1; 1822 nz = ai[i+1] - ai[i] - 1; 1823 sum = t[i]; 1824 while (nz--) sum -= x[*vj++]*(*v++); 1825 PetscLogFlops(2*nz-1); 1826 x[i] = (1-omega)*x[i] + omega*sum/d; 1827 } 1828 } 1829 its--; 1830 } 1831 1832 while (its--) { 1833 /* 1834 forward sweep: 1835 for i=0,...,m-1: 1836 sum[i] = (b[i] - U(i,:)x )/d[i]; 1837 x[i] = (1-omega)x[i] + omega*sum[i]; 1838 b = b - x[i]*U^T(i,:); 1839 1840 */ 1841 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1842 for (i=0; i<m; i++) 1843 t[i] = b[i]; 1844 1845 for (i=0; i<m; i++){ 1846 d = *(aa + ai[i]); /* diag[i] */ 1847 v = aa + ai[i] + 1; v1=v; 1848 vj = aj + ai[i] + 1; vj1=vj; 1849 nz = ai[i+1] - ai[i] - 1; nz1=nz; 1850 sum = t[i]; 1851 while (nz1--) sum -= (*v1++)*x[*vj1++]; 1852 x[i] = (1-omega)*x[i] + omega*sum/d; 1853 while (nz--) t[*vj++] -= x[i]*(*v++); 1854 PetscLogFlops(4*nz-2); 1855 } 1856 } 1857 1858 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1859 /* 1860 backward sweep: 1861 b = b - x[i]*U^T(i,:), i=0,...,n-2 1862 for i=m-1,...,0: 1863 sum[i] = (b[i] - U(i,:)x )/d[i]; 1864 x[i] = (1-omega)x[i] + omega*sum[i]; 1865 */ 1866 for (i=0; i<m; i++) 1867 t[i] = b[i]; 1868 1869 for (i=0; i<m-1; i++){ /* update rhs */ 1870 v = aa + ai[i] + 1; 1871 vj = aj + ai[i] + 1; 1872 nz = ai[i+1] - ai[i] - 1; 1873 while (nz--) t[*vj++] -= x[i]*(*v++); 1874 PetscLogFlops(2*nz-1); 1875 } 1876 for (i=m-1; i>=0; i--){ 1877 d = *(aa + ai[i]); 1878 v = aa + ai[i] + 1; 1879 vj = aj + ai[i] + 1; 1880 nz = ai[i+1] - ai[i] - 1; 1881 sum = t[i]; 1882 while (nz--) sum -= x[*vj++]*(*v++); 1883 PetscLogFlops(2*nz-1); 1884 x[i] = (1-omega)*x[i] + omega*sum/d; 1885 } 1886 } 1887 } 1888 1889 ierr = PetscFree(t); CHKERRQ(ierr); 1890 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1891 if (bb != xx) { 1892 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1893 } 1894 1895 PetscFunctionReturn(0); 1896 } 1897 1898 1899 1900 1901 1902 1903