1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 141 { 142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 143 144 PetscFunctionBegin; 145 switch (op) { 146 case MAT_ROW_ORIENTED: 147 a->roworiented = PETSC_TRUE; 148 break; 149 case MAT_COLUMN_ORIENTED: 150 a->roworiented = PETSC_FALSE; 151 break; 152 case MAT_COLUMNS_SORTED: 153 a->sorted = PETSC_TRUE; 154 break; 155 case MAT_COLUMNS_UNSORTED: 156 a->sorted = PETSC_FALSE; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = PETSC_TRUE; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = 1; 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = -1; 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = -2; 169 break; 170 case MAT_YES_NEW_NONZERO_LOCATIONS: 171 a->nonew = 0; 172 break; 173 case MAT_ROWS_SORTED: 174 case MAT_ROWS_UNSORTED: 175 case MAT_YES_NEW_DIAGONALS: 176 case MAT_IGNORE_OFF_PROC_ENTRIES: 177 case MAT_USE_HASH_TABLE: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 case MAT_NOT_SYMMETRIC: 183 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 184 case MAT_HERMITIAN: 185 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 186 case MAT_SYMMETRIC: 187 case MAT_STRUCTURALLY_SYMMETRIC: 188 case MAT_NOT_HERMITIAN: 189 case MAT_SYMMETRY_ETERNAL: 190 case MAT_NOT_SYMMETRY_ETERNAL: 191 break; 192 default: 193 SETERRQ(PETSC_ERR_SUP,"unknown option"); 194 } 195 PetscFunctionReturn(0); 196 } 197 198 #undef __FUNCT__ 199 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 200 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 201 { 202 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 203 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 204 MatScalar *aa,*aa_i; 205 PetscScalar *v_i; 206 207 PetscFunctionBegin; 208 bs = a->bs; 209 ai = a->i; 210 aj = a->j; 211 aa = a->a; 212 bs2 = a->bs2; 213 214 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row); 215 216 bn = row/bs; /* Block number */ 217 bp = row % bs; /* Block position */ 218 M = ai[bn+1] - ai[bn]; 219 *ncols = bs*M; 220 221 if (v) { 222 *v = 0; 223 if (*ncols) { 224 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 225 for (i=0; i<M; i++) { /* for each block in the block row */ 226 v_i = *v + i*bs; 227 aa_i = aa + bs2*(ai[bn] + i); 228 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 229 } 230 } 231 } 232 233 if (cols) { 234 *cols = 0; 235 if (*ncols) { 236 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 237 for (i=0; i<M; i++) { /* for each block in the block row */ 238 cols_i = *cols + i*bs; 239 itmp = bs*aj[ai[bn] + i]; 240 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 241 } 242 } 243 } 244 245 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 246 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 247 #ifdef column_search 248 v_i = *v + M*bs; 249 cols_i = *cols + M*bs; 250 for (i=0; i<bn; i++){ /* for each block row */ 251 M = ai[i+1] - ai[i]; 252 for (j=0; j<M; j++){ 253 itmp = aj[ai[i] + j]; /* block column value */ 254 if (itmp == bn){ 255 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 256 for (k=0; k<bs; k++) { 257 *cols_i++ = i*bs+k; 258 *v_i++ = aa_i[k]; 259 } 260 *ncols += bs; 261 break; 262 } 263 } 264 } 265 #endif 266 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 272 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 273 { 274 int ierr; 275 276 PetscFunctionBegin; 277 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 278 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 284 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 285 { 286 int ierr; 287 PetscFunctionBegin; 288 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 294 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 295 { 296 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 297 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 298 PetscScalar *aa; 299 FILE *file; 300 301 PetscFunctionBegin; 302 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 303 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 304 col_lens[0] = MAT_FILE_COOKIE; 305 306 col_lens[1] = A->m; 307 col_lens[2] = A->m; 308 col_lens[3] = a->s_nz*bs2; 309 310 /* store lengths of each row and write (including header) to file */ 311 count = 0; 312 for (i=0; i<a->mbs; i++) { 313 for (j=0; j<bs; j++) { 314 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 315 } 316 } 317 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 318 ierr = PetscFree(col_lens);CHKERRQ(ierr); 319 320 /* store column indices (zero start index) */ 321 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 322 count = 0; 323 for (i=0; i<a->mbs; i++) { 324 for (j=0; j<bs; j++) { 325 for (k=a->i[i]; k<a->i[i+1]; k++) { 326 for (l=0; l<bs; l++) { 327 jj[count++] = bs*a->j[k] + l; 328 } 329 } 330 } 331 } 332 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 333 ierr = PetscFree(jj);CHKERRQ(ierr); 334 335 /* store nonzero values */ 336 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 337 count = 0; 338 for (i=0; i<a->mbs; i++) { 339 for (j=0; j<bs; j++) { 340 for (k=a->i[i]; k<a->i[i+1]; k++) { 341 for (l=0; l<bs; l++) { 342 aa[count++] = a->a[bs2*k + l*bs + j]; 343 } 344 } 345 } 346 } 347 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 348 ierr = PetscFree(aa);CHKERRQ(ierr); 349 350 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 351 if (file) { 352 fprintf(file,"-matload_block_size %d\n",a->bs); 353 } 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 359 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 360 { 361 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 362 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 363 char *name; 364 PetscViewerFormat format; 365 366 PetscFunctionBegin; 367 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 368 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 369 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 370 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 371 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 372 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 373 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 374 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 375 for (i=0; i<a->mbs; i++) { 376 for (j=0; j<bs; j++) { 377 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 378 for (k=a->i[i]; k<a->i[i+1]; k++) { 379 for (l=0; l<bs; l++) { 380 #if defined(PETSC_USE_COMPLEX) 381 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 382 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 383 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 384 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 385 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 386 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 387 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 388 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 389 } 390 #else 391 if (a->a[bs2*k + l*bs + j] != 0.0) { 392 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 393 } 394 #endif 395 } 396 } 397 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 398 } 399 } 400 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 401 } else { 402 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 403 for (i=0; i<a->mbs; i++) { 404 for (j=0; j<bs; j++) { 405 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 406 for (k=a->i[i]; k<a->i[i+1]; k++) { 407 for (l=0; l<bs; l++) { 408 #if defined(PETSC_USE_COMPLEX) 409 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 410 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 411 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 412 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 413 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 414 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 415 } else { 416 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 420 #endif 421 } 422 } 423 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 424 } 425 } 426 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 427 } 428 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 434 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 435 { 436 Mat A = (Mat) Aa; 437 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 438 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 439 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 440 MatScalar *aa; 441 MPI_Comm comm; 442 PetscViewer viewer; 443 444 PetscFunctionBegin; 445 /* 446 This is nasty. If this is called from an originally parallel matrix 447 then all processes call this,but only the first has the matrix so the 448 rest should return immediately. 449 */ 450 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 451 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 452 if (rank) PetscFunctionReturn(0); 453 454 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 455 456 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 457 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 458 459 /* loop over matrix elements drawing boxes */ 460 color = PETSC_DRAW_BLUE; 461 for (i=0,row=0; i<mbs; i++,row+=bs) { 462 for (j=a->i[i]; j<a->i[i+1]; j++) { 463 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 464 x_l = a->j[j]*bs; x_r = x_l + 1.0; 465 aa = a->a + j*bs2; 466 for (k=0; k<bs; k++) { 467 for (l=0; l<bs; l++) { 468 if (PetscRealPart(*aa++) >= 0.) continue; 469 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 470 } 471 } 472 } 473 } 474 color = PETSC_DRAW_CYAN; 475 for (i=0,row=0; i<mbs; i++,row+=bs) { 476 for (j=a->i[i]; j<a->i[i+1]; j++) { 477 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 478 x_l = a->j[j]*bs; x_r = x_l + 1.0; 479 aa = a->a + j*bs2; 480 for (k=0; k<bs; k++) { 481 for (l=0; l<bs; l++) { 482 if (PetscRealPart(*aa++) != 0.) continue; 483 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 484 } 485 } 486 } 487 } 488 489 color = PETSC_DRAW_RED; 490 for (i=0,row=0; i<mbs; i++,row+=bs) { 491 for (j=a->i[i]; j<a->i[i+1]; j++) { 492 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 493 x_l = a->j[j]*bs; x_r = x_l + 1.0; 494 aa = a->a + j*bs2; 495 for (k=0; k<bs; k++) { 496 for (l=0; l<bs; l++) { 497 if (PetscRealPart(*aa++) <= 0.) continue; 498 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 499 } 500 } 501 } 502 } 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 508 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 509 { 510 int ierr; 511 PetscReal xl,yl,xr,yr,w,h; 512 PetscDraw draw; 513 PetscTruth isnull; 514 515 PetscFunctionBegin; 516 517 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 518 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 519 520 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 521 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 522 xr += w; yr += h; xl = -w; yl = -h; 523 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 524 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 525 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 #undef __FUNCT__ 530 #define __FUNCT__ "MatView_SeqSBAIJ" 531 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 532 { 533 int ierr; 534 PetscTruth issocket,isascii,isbinary,isdraw; 535 536 PetscFunctionBegin; 537 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 538 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 539 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 540 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 541 if (issocket) { 542 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 543 } else if (isascii){ 544 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 545 } else if (isbinary) { 546 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 547 } else if (isdraw) { 548 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 549 } else { 550 SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name); 551 } 552 PetscFunctionReturn(0); 553 } 554 555 556 #undef __FUNCT__ 557 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 558 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 559 { 560 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 561 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 562 int *ai = a->i,*ailen = a->ilen; 563 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 564 MatScalar *ap,*aa = a->a,zero = 0.0; 565 566 PetscFunctionBegin; 567 for (k=0; k<m; k++) { /* loop over rows */ 568 row = im[k]; brow = row/bs; 569 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 570 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 571 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 572 nrow = ailen[brow]; 573 for (l=0; l<n; l++) { /* loop over columns */ 574 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 575 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 576 col = in[l] ; 577 bcol = col/bs; 578 cidx = col%bs; 579 ridx = row%bs; 580 high = nrow; 581 low = 0; /* assume unsorted */ 582 while (high-low > 5) { 583 t = (low+high)/2; 584 if (rp[t] > bcol) high = t; 585 else low = t; 586 } 587 for (i=low; i<high; i++) { 588 if (rp[i] > bcol) break; 589 if (rp[i] == bcol) { 590 *v++ = ap[bs2*i+bs*cidx+ridx]; 591 goto finished; 592 } 593 } 594 *v++ = zero; 595 finished:; 596 } 597 } 598 PetscFunctionReturn(0); 599 } 600 601 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 604 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 605 { 606 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 607 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 608 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 609 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 610 PetscTruth roworiented=a->roworiented; 611 const MatScalar *value = v; 612 MatScalar *ap,*aa = a->a,*bap; 613 614 PetscFunctionBegin; 615 if (roworiented) { 616 stepval = (n-1)*bs; 617 } else { 618 stepval = (m-1)*bs; 619 } 620 for (k=0; k<m; k++) { /* loop over added rows */ 621 row = im[k]; 622 if (row < 0) continue; 623 #if defined(PETSC_USE_BOPT_g) 624 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 625 #endif 626 rp = aj + ai[row]; 627 ap = aa + bs2*ai[row]; 628 rmax = imax[row]; 629 nrow = ailen[row]; 630 low = 0; 631 for (l=0; l<n; l++) { /* loop over added columns */ 632 if (in[l] < 0) continue; 633 col = in[l]; 634 #if defined(PETSC_USE_BOPT_g) 635 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 636 #endif 637 if (col < row) continue; /* ignore lower triangular block */ 638 if (roworiented) { 639 value = v + k*(stepval+bs)*bs + l*bs; 640 } else { 641 value = v + l*(stepval+bs)*bs + k*bs; 642 } 643 if (!sorted) low = 0; high = nrow; 644 while (high-low > 7) { 645 t = (low+high)/2; 646 if (rp[t] > col) high = t; 647 else low = t; 648 } 649 for (i=low; i<high; i++) { 650 if (rp[i] > col) break; 651 if (rp[i] == col) { 652 bap = ap + bs2*i; 653 if (roworiented) { 654 if (is == ADD_VALUES) { 655 for (ii=0; ii<bs; ii++,value+=stepval) { 656 for (jj=ii; jj<bs2; jj+=bs) { 657 bap[jj] += *value++; 658 } 659 } 660 } else { 661 for (ii=0; ii<bs; ii++,value+=stepval) { 662 for (jj=ii; jj<bs2; jj+=bs) { 663 bap[jj] = *value++; 664 } 665 } 666 } 667 } else { 668 if (is == ADD_VALUES) { 669 for (ii=0; ii<bs; ii++,value+=stepval) { 670 for (jj=0; jj<bs; jj++) { 671 *bap++ += *value++; 672 } 673 } 674 } else { 675 for (ii=0; ii<bs; ii++,value+=stepval) { 676 for (jj=0; jj<bs; jj++) { 677 *bap++ = *value++; 678 } 679 } 680 } 681 } 682 goto noinsert2; 683 } 684 } 685 if (nonew == 1) goto noinsert2; 686 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 687 if (nrow >= rmax) { 688 /* there is no extra room in row, therefore enlarge */ 689 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 690 MatScalar *new_a; 691 692 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 693 694 /* malloc new storage space */ 695 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 696 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 697 new_j = (int*)(new_a + bs2*new_nz); 698 new_i = new_j + new_nz; 699 700 /* copy over old data into new slots */ 701 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 702 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 703 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 704 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 705 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 706 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 707 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 708 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 709 /* free up old matrix storage */ 710 ierr = PetscFree(a->a);CHKERRQ(ierr); 711 if (!a->singlemalloc) { 712 ierr = PetscFree(a->i);CHKERRQ(ierr); 713 ierr = PetscFree(a->j);CHKERRQ(ierr); 714 } 715 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 716 a->singlemalloc = PETSC_TRUE; 717 718 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 719 rmax = imax[row] = imax[row] + CHUNKSIZE; 720 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 721 a->s_maxnz += bs2*CHUNKSIZE; 722 a->reallocs++; 723 a->s_nz++; 724 } 725 N = nrow++ - 1; 726 /* shift up all the later entries in this row */ 727 for (ii=N; ii>=i; ii--) { 728 rp[ii+1] = rp[ii]; 729 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 730 } 731 if (N >= i) { 732 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 733 } 734 rp[i] = col; 735 bap = ap + bs2*i; 736 if (roworiented) { 737 for (ii=0; ii<bs; ii++,value+=stepval) { 738 for (jj=ii; jj<bs2; jj+=bs) { 739 bap[jj] = *value++; 740 } 741 } 742 } else { 743 for (ii=0; ii<bs; ii++,value+=stepval) { 744 for (jj=0; jj<bs; jj++) { 745 *bap++ = *value++; 746 } 747 } 748 } 749 noinsert2:; 750 low = i; 751 } 752 ailen[row] = nrow; 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 759 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 760 { 761 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 762 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 763 int m = A->m,*ip,N,*ailen = a->ilen; 764 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 765 MatScalar *aa = a->a,*ap; 766 767 PetscFunctionBegin; 768 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 769 770 if (m) rmax = ailen[0]; 771 for (i=1; i<mbs; i++) { 772 /* move each row back by the amount of empty slots (fshift) before it*/ 773 fshift += imax[i-1] - ailen[i-1]; 774 rmax = PetscMax(rmax,ailen[i]); 775 if (fshift) { 776 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 777 N = ailen[i]; 778 for (j=0; j<N; j++) { 779 ip[j-fshift] = ip[j]; 780 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 781 } 782 } 783 ai[i] = ai[i-1] + ailen[i-1]; 784 } 785 if (mbs) { 786 fshift += imax[mbs-1] - ailen[mbs-1]; 787 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 788 } 789 /* reset ilen and imax for each row */ 790 for (i=0; i<mbs; i++) { 791 ailen[i] = imax[i] = ai[i+1] - ai[i]; 792 } 793 a->s_nz = ai[mbs]; 794 795 /* diagonals may have moved, reset it */ 796 if (a->diag) { 797 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 798 } 799 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 800 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 801 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 802 a->reallocs); 803 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 804 a->reallocs = 0; 805 A->info.nz_unneeded = (PetscReal)fshift*bs2; 806 807 PetscFunctionReturn(0); 808 } 809 810 /* 811 This function returns an array of flags which indicate the locations of contiguous 812 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 813 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 814 Assume: sizes should be long enough to hold all the values. 815 */ 816 #undef __FUNCT__ 817 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 818 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 819 { 820 int i,j,k,row; 821 PetscTruth flg; 822 823 PetscFunctionBegin; 824 for (i=0,j=0; i<n; j++) { 825 row = idx[i]; 826 if (row%bs!=0) { /* Not the begining of a block */ 827 sizes[j] = 1; 828 i++; 829 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 830 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 831 i++; 832 } else { /* Begining of the block, so check if the complete block exists */ 833 flg = PETSC_TRUE; 834 for (k=1; k<bs; k++) { 835 if (row+k != idx[i+k]) { /* break in the block */ 836 flg = PETSC_FALSE; 837 break; 838 } 839 } 840 if (flg == PETSC_TRUE) { /* No break in the bs */ 841 sizes[j] = bs; 842 i+= bs; 843 } else { 844 sizes[j] = 1; 845 i++; 846 } 847 } 848 } 849 *bs_max = j; 850 PetscFunctionReturn(0); 851 } 852 853 #undef __FUNCT__ 854 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 855 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 856 { 857 PetscFunctionBegin; 858 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 859 } 860 861 /* Only add/insert a(i,j) with i<=j (blocks). 862 Any a(i,j) with i>j input by user is ingored. 863 */ 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 867 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 868 { 869 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 870 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 871 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 872 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 873 int ridx,cidx,bs2=a->bs2,ierr; 874 MatScalar *ap,value,*aa=a->a,*bap; 875 876 PetscFunctionBegin; 877 878 for (k=0; k<m; k++) { /* loop over added rows */ 879 row = im[k]; /* row number */ 880 brow = row/bs; /* block row number */ 881 if (row < 0) continue; 882 #if defined(PETSC_USE_BOPT_g) 883 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 884 #endif 885 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 886 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 887 rmax = imax[brow]; /* maximum space allocated for this row */ 888 nrow = ailen[brow]; /* actual length of this row */ 889 low = 0; 890 891 for (l=0; l<n; l++) { /* loop over added columns */ 892 if (in[l] < 0) continue; 893 #if defined(PETSC_USE_BOPT_g) 894 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 895 #endif 896 col = in[l]; 897 bcol = col/bs; /* block col number */ 898 899 if (brow <= bcol){ 900 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 901 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 902 /* element value a(k,l) */ 903 if (roworiented) { 904 value = v[l + k*n]; 905 } else { 906 value = v[k + l*m]; 907 } 908 909 /* move pointer bap to a(k,l) quickly and add/insert value */ 910 if (!sorted) low = 0; high = nrow; 911 while (high-low > 7) { 912 t = (low+high)/2; 913 if (rp[t] > bcol) high = t; 914 else low = t; 915 } 916 for (i=low; i<high; i++) { 917 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 918 if (rp[i] > bcol) break; 919 if (rp[i] == bcol) { 920 bap = ap + bs2*i + bs*cidx + ridx; 921 if (is == ADD_VALUES) *bap += value; 922 else *bap = value; 923 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 924 if (brow == bcol && ridx < cidx){ 925 bap = ap + bs2*i + bs*ridx + cidx; 926 if (is == ADD_VALUES) *bap += value; 927 else *bap = value; 928 } 929 goto noinsert1; 930 } 931 } 932 933 if (nonew == 1) goto noinsert1; 934 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 935 if (nrow >= rmax) { 936 /* there is no extra room in row, therefore enlarge */ 937 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 938 MatScalar *new_a; 939 940 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 941 942 /* Malloc new storage space */ 943 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 944 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 945 new_j = (int*)(new_a + bs2*new_nz); 946 new_i = new_j + new_nz; 947 948 /* copy over old data into new slots */ 949 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 950 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 951 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 952 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 953 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 954 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 955 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 956 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 957 /* free up old matrix storage */ 958 ierr = PetscFree(a->a);CHKERRQ(ierr); 959 if (!a->singlemalloc) { 960 ierr = PetscFree(a->i);CHKERRQ(ierr); 961 ierr = PetscFree(a->j);CHKERRQ(ierr); 962 } 963 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 964 a->singlemalloc = PETSC_TRUE; 965 966 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 967 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 968 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 969 a->s_maxnz += bs2*CHUNKSIZE; 970 a->reallocs++; 971 a->s_nz++; 972 } 973 974 N = nrow++ - 1; 975 /* shift up all the later entries in this row */ 976 for (ii=N; ii>=i; ii--) { 977 rp[ii+1] = rp[ii]; 978 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 979 } 980 if (N>=i) { 981 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 982 } 983 rp[i] = bcol; 984 ap[bs2*i + bs*cidx + ridx] = value; 985 noinsert1:; 986 low = i; 987 /* } */ 988 } 989 } /* end of if .. if.. */ 990 } /* end of loop over added columns */ 991 ailen[brow] = nrow; 992 } /* end of loop over added rows */ 993 994 PetscFunctionReturn(0); 995 } 996 997 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 998 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 999 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int); 1000 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1001 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 1002 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 1003 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 1004 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat); 1005 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 1006 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1007 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1008 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1009 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1010 extern int MatZeroEntries_SeqSBAIJ(Mat); 1011 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1012 1013 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1014 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1015 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1016 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1017 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1018 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1019 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1020 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1021 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1022 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1023 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1024 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1025 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1026 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1027 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1028 1029 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 1030 1031 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1032 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1033 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1034 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1035 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1036 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1037 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1038 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1039 1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1045 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1046 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1047 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1048 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 1049 1050 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1051 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1052 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1053 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1054 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1055 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1056 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1057 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1058 1059 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1060 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1061 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1062 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1063 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1064 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1065 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1066 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1067 1068 #ifdef HAVE_MatICCFactor 1069 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1072 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1073 { 1074 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1075 Mat outA; 1076 int ierr; 1077 PetscTruth row_identity,col_identity; 1078 1079 PetscFunctionBegin; 1080 /* 1081 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1082 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1083 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1084 if (!row_identity || !col_identity) { 1085 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1086 } 1087 */ 1088 1089 outA = inA; 1090 inA->factor = FACTOR_CHOLESKY; 1091 1092 if (!a->diag) { 1093 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1094 } 1095 /* 1096 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1097 for ILU(0) factorization with natural ordering 1098 */ 1099 switch (a->bs) { 1100 case 1: 1101 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1102 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1103 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1104 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1105 case 2: 1106 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1107 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1108 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1109 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1110 break; 1111 case 3: 1112 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1113 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1114 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1115 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1116 break; 1117 case 4: 1118 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1119 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1120 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1121 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1122 break; 1123 case 5: 1124 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1125 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1126 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1127 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1128 break; 1129 case 6: 1130 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1131 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1132 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1133 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1134 break; 1135 case 7: 1136 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1137 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1138 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1139 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1140 break; 1141 default: 1142 a->row = row; 1143 a->icol = col; 1144 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1145 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1146 1147 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1148 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1149 PetscLogObjectParent(inA,a->icol); 1150 1151 if (!a->solve_work) { 1152 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1153 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1154 } 1155 } 1156 1157 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1158 1159 PetscFunctionReturn(0); 1160 } 1161 #endif 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1165 int MatPrintHelp_SeqSBAIJ(Mat A) 1166 { 1167 static PetscTruth called = PETSC_FALSE; 1168 MPI_Comm comm = A->comm; 1169 int ierr; 1170 1171 PetscFunctionBegin; 1172 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1173 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1174 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 EXTERN_C_BEGIN 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1181 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1182 { 1183 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1184 int i,nz,n; 1185 1186 PetscFunctionBegin; 1187 nz = baij->s_maxnz; 1188 n = mat->n; 1189 for (i=0; i<nz; i++) { 1190 baij->j[i] = indices[i]; 1191 } 1192 baij->s_nz = nz; 1193 for (i=0; i<n; i++) { 1194 baij->ilen[i] = baij->imax[i]; 1195 } 1196 1197 PetscFunctionReturn(0); 1198 } 1199 EXTERN_C_END 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1203 /*@ 1204 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1205 in the matrix. 1206 1207 Input Parameters: 1208 + mat - the SeqSBAIJ matrix 1209 - indices - the column indices 1210 1211 Level: advanced 1212 1213 Notes: 1214 This can be called if you have precomputed the nonzero structure of the 1215 matrix and want to provide it to the matrix object to improve the performance 1216 of the MatSetValues() operation. 1217 1218 You MUST have set the correct numbers of nonzeros per row in the call to 1219 MatCreateSeqSBAIJ(). 1220 1221 MUST be called before any calls to MatSetValues() 1222 1223 .seealso: MatCreateSeqSBAIJ 1224 @*/ 1225 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1226 { 1227 int ierr,(*f)(Mat,int *); 1228 1229 PetscFunctionBegin; 1230 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1231 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1232 if (f) { 1233 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1234 } else { 1235 SETERRQ(1,"Wrong type of matrix to set column indices"); 1236 } 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1242 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1243 { 1244 int ierr; 1245 1246 PetscFunctionBegin; 1247 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1253 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1254 { 1255 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1256 PetscFunctionBegin; 1257 *array = a->a; 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1263 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1264 { 1265 PetscFunctionBegin; 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #include "petscblaslapack.h" 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1272 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1273 { 1274 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1275 int ierr,one=1,i,bs=y->bs,bs2,j; 1276 1277 PetscFunctionBegin; 1278 if (str == SAME_NONZERO_PATTERN) { 1279 BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1280 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1281 if (y->xtoy && y->XtoY != X) { 1282 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1283 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1284 } 1285 if (!y->xtoy) { /* get xtoy */ 1286 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1287 y->XtoY = X; 1288 } 1289 bs2 = bs*bs; 1290 for (i=0; i<x->s_nz; i++) { 1291 j = 0; 1292 while (j < bs2){ 1293 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1294 j++; 1295 } 1296 } 1297 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz)); 1298 } else { 1299 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1300 } 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1306 int MatIsSymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1307 { 1308 PetscFunctionBegin; 1309 *flg = PETSC_TRUE; 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1315 int MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1316 { 1317 PetscFunctionBegin; 1318 *flg = PETSC_TRUE; 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1324 int MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1325 { 1326 PetscFunctionBegin; 1327 *flg = PETSC_FALSE; 1328 PetscFunctionReturn(0); 1329 } 1330 1331 /* -------------------------------------------------------------------*/ 1332 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1333 MatGetRow_SeqSBAIJ, 1334 MatRestoreRow_SeqSBAIJ, 1335 MatMult_SeqSBAIJ_N, 1336 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1337 MatMultTranspose_SeqSBAIJ, 1338 MatMultTransposeAdd_SeqSBAIJ, 1339 MatSolve_SeqSBAIJ_N, 1340 0, 1341 0, 1342 /*10*/ 0, 1343 0, 1344 MatCholeskyFactor_SeqSBAIJ, 1345 MatRelax_SeqSBAIJ, 1346 MatTranspose_SeqSBAIJ, 1347 /*15*/ MatGetInfo_SeqSBAIJ, 1348 MatEqual_SeqSBAIJ, 1349 MatGetDiagonal_SeqSBAIJ, 1350 MatDiagonalScale_SeqSBAIJ, 1351 MatNorm_SeqSBAIJ, 1352 /*20*/ 0, 1353 MatAssemblyEnd_SeqSBAIJ, 1354 0, 1355 MatSetOption_SeqSBAIJ, 1356 MatZeroEntries_SeqSBAIJ, 1357 /*25*/ MatZeroRows_SeqSBAIJ, 1358 0, 1359 0, 1360 MatCholeskyFactorSymbolic_SeqSBAIJ, 1361 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1362 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1363 0, 1364 MatICCFactorSymbolic_SeqSBAIJ, 1365 MatGetArray_SeqSBAIJ, 1366 MatRestoreArray_SeqSBAIJ, 1367 /*35*/ MatDuplicate_SeqSBAIJ, 1368 0, 1369 0, 1370 0, 1371 0, 1372 /*40*/ MatAXPY_SeqSBAIJ, 1373 MatGetSubMatrices_SeqSBAIJ, 1374 MatIncreaseOverlap_SeqSBAIJ, 1375 MatGetValues_SeqSBAIJ, 1376 0, 1377 /*45*/ MatPrintHelp_SeqSBAIJ, 1378 MatScale_SeqSBAIJ, 1379 0, 1380 0, 1381 0, 1382 /*50*/ MatGetBlockSize_SeqSBAIJ, 1383 MatGetRowIJ_SeqSBAIJ, 1384 MatRestoreRowIJ_SeqSBAIJ, 1385 0, 1386 0, 1387 /*55*/ 0, 1388 0, 1389 0, 1390 0, 1391 MatSetValuesBlocked_SeqSBAIJ, 1392 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1393 0, 1394 0, 1395 MatGetPetscMaps_Petsc, 1396 0, 1397 /*65*/ 0, 1398 0, 1399 0, 1400 0, 1401 0, 1402 /*70*/ MatGetRowMax_SeqSBAIJ, 1403 0, 1404 0, 1405 0, 1406 0, 1407 /*75*/ 0, 1408 0, 1409 0, 1410 0, 1411 0, 1412 /*80*/ 0, 1413 0, 1414 0, 1415 #if !defined(PETSC_USE_COMPLEX) 1416 MatGetInertia_SeqSBAIJ, 1417 #else 1418 0, 1419 #endif 1420 /*85*/ MatLoad_SeqSBAIJ, 1421 MatIsSymmetric_SeqSBAIJ, 1422 MatIsStructurallySymmetric_SeqSBAIJ, 1423 MatIsHermitian_SeqSBAIJ 1424 }; 1425 1426 EXTERN_C_BEGIN 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1429 int MatStoreValues_SeqSBAIJ(Mat mat) 1430 { 1431 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1432 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1433 int ierr; 1434 1435 PetscFunctionBegin; 1436 if (aij->nonew != 1) { 1437 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1438 } 1439 1440 /* allocate space for values if not already there */ 1441 if (!aij->saved_values) { 1442 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1443 } 1444 1445 /* copy values over */ 1446 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1447 PetscFunctionReturn(0); 1448 } 1449 EXTERN_C_END 1450 1451 EXTERN_C_BEGIN 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1454 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1455 { 1456 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1457 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1458 1459 PetscFunctionBegin; 1460 if (aij->nonew != 1) { 1461 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1462 } 1463 if (!aij->saved_values) { 1464 SETERRQ(1,"Must call MatStoreValues(A);first"); 1465 } 1466 1467 /* copy values over */ 1468 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } 1471 EXTERN_C_END 1472 1473 EXTERN_C_BEGIN 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1476 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1477 { 1478 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1479 int i,len,ierr,mbs,bs2; 1480 PetscTruth flg; 1481 int s_nz; 1482 1483 PetscFunctionBegin; 1484 B->preallocated = PETSC_TRUE; 1485 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1486 mbs = B->m/bs; 1487 bs2 = bs*bs; 1488 1489 if (mbs*bs != B->m) { 1490 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1491 } 1492 1493 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1494 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1495 if (nnz) { 1496 for (i=0; i<mbs; i++) { 1497 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1498 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1499 } 1500 } 1501 1502 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1503 if (!flg) { 1504 switch (bs) { 1505 case 1: 1506 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1507 B->ops->solve = MatSolve_SeqSBAIJ_1; 1508 B->ops->solves = MatSolves_SeqSBAIJ_1; 1509 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1510 B->ops->mult = MatMult_SeqSBAIJ_1; 1511 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1512 break; 1513 case 2: 1514 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1515 B->ops->solve = MatSolve_SeqSBAIJ_2; 1516 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1517 B->ops->mult = MatMult_SeqSBAIJ_2; 1518 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1519 break; 1520 case 3: 1521 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1522 B->ops->solve = MatSolve_SeqSBAIJ_3; 1523 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1524 B->ops->mult = MatMult_SeqSBAIJ_3; 1525 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1526 break; 1527 case 4: 1528 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1529 B->ops->solve = MatSolve_SeqSBAIJ_4; 1530 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1531 B->ops->mult = MatMult_SeqSBAIJ_4; 1532 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1533 break; 1534 case 5: 1535 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1536 B->ops->solve = MatSolve_SeqSBAIJ_5; 1537 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1538 B->ops->mult = MatMult_SeqSBAIJ_5; 1539 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1540 break; 1541 case 6: 1542 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1543 B->ops->solve = MatSolve_SeqSBAIJ_6; 1544 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1545 B->ops->mult = MatMult_SeqSBAIJ_6; 1546 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1547 break; 1548 case 7: 1549 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1550 B->ops->solve = MatSolve_SeqSBAIJ_7; 1551 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1552 B->ops->mult = MatMult_SeqSBAIJ_7; 1553 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1554 break; 1555 } 1556 } 1557 1558 b->mbs = mbs; 1559 b->nbs = mbs; 1560 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1561 if (!nnz) { 1562 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1563 else if (nz <= 0) nz = 1; 1564 for (i=0; i<mbs; i++) { 1565 b->imax[i] = nz; 1566 } 1567 nz = nz*mbs; /* total nz */ 1568 } else { 1569 nz = 0; 1570 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1571 } 1572 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1573 s_nz = nz; 1574 1575 /* allocate the matrix space */ 1576 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1577 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1578 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1579 b->j = (int*)(b->a + s_nz*bs2); 1580 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1581 b->i = b->j + s_nz; 1582 b->singlemalloc = PETSC_TRUE; 1583 1584 /* pointer to beginning of each row */ 1585 b->i[0] = 0; 1586 for (i=1; i<mbs+1; i++) { 1587 b->i[i] = b->i[i-1] + b->imax[i-1]; 1588 } 1589 1590 /* b->ilen will count nonzeros in each block row so far. */ 1591 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1592 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1593 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1594 1595 b->bs = bs; 1596 b->bs2 = bs2; 1597 b->s_nz = 0; 1598 b->s_maxnz = s_nz*bs2; 1599 1600 b->inew = 0; 1601 b->jnew = 0; 1602 b->anew = 0; 1603 b->a2anew = 0; 1604 b->permute = PETSC_FALSE; 1605 PetscFunctionReturn(0); 1606 } 1607 EXTERN_C_END 1608 1609 /*MC 1610 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1611 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1612 1613 Options Database Keys: 1614 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1615 1616 Level: beginner 1617 1618 .seealso: MatCreateSeqSBAIJ 1619 M*/ 1620 1621 EXTERN_C_BEGIN 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1624 int MatCreate_SeqSBAIJ(Mat B) 1625 { 1626 Mat_SeqSBAIJ *b; 1627 int ierr,size; 1628 1629 PetscFunctionBegin; 1630 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1631 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1632 B->m = B->M = PetscMax(B->m,B->M); 1633 B->n = B->N = PetscMax(B->n,B->N); 1634 1635 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1636 B->data = (void*)b; 1637 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1638 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1639 B->ops->destroy = MatDestroy_SeqSBAIJ; 1640 B->ops->view = MatView_SeqSBAIJ; 1641 B->factor = 0; 1642 B->lupivotthreshold = 1.0; 1643 B->mapping = 0; 1644 b->row = 0; 1645 b->icol = 0; 1646 b->reallocs = 0; 1647 b->saved_values = 0; 1648 1649 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1650 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1651 1652 b->sorted = PETSC_FALSE; 1653 b->roworiented = PETSC_TRUE; 1654 b->nonew = 0; 1655 b->diag = 0; 1656 b->solve_work = 0; 1657 b->mult_work = 0; 1658 B->spptr = 0; 1659 b->keepzeroedrows = PETSC_FALSE; 1660 b->xtoy = 0; 1661 b->XtoY = 0; 1662 1663 b->inew = 0; 1664 b->jnew = 0; 1665 b->anew = 0; 1666 b->a2anew = 0; 1667 b->permute = PETSC_FALSE; 1668 1669 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1670 "MatStoreValues_SeqSBAIJ", 1671 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1672 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1673 "MatRetrieveValues_SeqSBAIJ", 1674 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1675 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1676 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1677 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1678 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1679 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1680 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1681 PetscFunctionReturn(0); 1682 } 1683 EXTERN_C_END 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1687 /*@C 1688 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1689 compressed row) format. For good matrix assembly performance the 1690 user should preallocate the matrix storage by setting the parameter nz 1691 (or the array nnz). By setting these parameters accurately, performance 1692 during matrix assembly can be increased by more than a factor of 50. 1693 1694 Collective on Mat 1695 1696 Input Parameters: 1697 + A - the symmetric matrix 1698 . bs - size of block 1699 . nz - number of block nonzeros per block row (same for all rows) 1700 - nnz - array containing the number of block nonzeros in the upper triangular plus 1701 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1702 1703 Options Database Keys: 1704 . -mat_no_unroll - uses code that does not unroll the loops in the 1705 block calculations (much slower) 1706 . -mat_block_size - size of the blocks to use 1707 1708 Level: intermediate 1709 1710 Notes: 1711 Specify the preallocated storage with either nz or nnz (not both). 1712 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1713 allocation. For additional details, see the users manual chapter on 1714 matrices. 1715 1716 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1717 @*/ 1718 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1719 int ierr,(*f)(Mat,int,int,const int[]); 1720 1721 PetscFunctionBegin; 1722 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1723 if (f) { 1724 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1725 } 1726 PetscFunctionReturn(0); 1727 } 1728 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatCreateSeqSBAIJ" 1731 /*@C 1732 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1733 compressed row) format. For good matrix assembly performance the 1734 user should preallocate the matrix storage by setting the parameter nz 1735 (or the array nnz). By setting these parameters accurately, performance 1736 during matrix assembly can be increased by more than a factor of 50. 1737 1738 Collective on MPI_Comm 1739 1740 Input Parameters: 1741 + comm - MPI communicator, set to PETSC_COMM_SELF 1742 . bs - size of block 1743 . m - number of rows, or number of columns 1744 . nz - number of block nonzeros per block row (same for all rows) 1745 - nnz - array containing the number of block nonzeros in the upper triangular plus 1746 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1747 1748 Output Parameter: 1749 . A - the symmetric matrix 1750 1751 Options Database Keys: 1752 . -mat_no_unroll - uses code that does not unroll the loops in the 1753 block calculations (much slower) 1754 . -mat_block_size - size of the blocks to use 1755 1756 Level: intermediate 1757 1758 Notes: 1759 1760 Specify the preallocated storage with either nz or nnz (not both). 1761 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1762 allocation. For additional details, see the users manual chapter on 1763 matrices. 1764 1765 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1766 @*/ 1767 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1768 { 1769 int ierr; 1770 1771 PetscFunctionBegin; 1772 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1773 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1774 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 #undef __FUNCT__ 1779 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1780 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1781 { 1782 Mat C; 1783 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1784 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1785 1786 PetscFunctionBegin; 1787 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1788 1789 *B = 0; 1790 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1791 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1792 c = (Mat_SeqSBAIJ*)C->data; 1793 1794 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1795 C->preallocated = PETSC_TRUE; 1796 C->factor = A->factor; 1797 c->row = 0; 1798 c->icol = 0; 1799 c->saved_values = 0; 1800 c->keepzeroedrows = a->keepzeroedrows; 1801 C->assembled = PETSC_TRUE; 1802 1803 c->bs = a->bs; 1804 c->bs2 = a->bs2; 1805 c->mbs = a->mbs; 1806 c->nbs = a->nbs; 1807 1808 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1809 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1810 for (i=0; i<mbs; i++) { 1811 c->imax[i] = a->imax[i]; 1812 c->ilen[i] = a->ilen[i]; 1813 } 1814 1815 /* allocate the matrix space */ 1816 c->singlemalloc = PETSC_TRUE; 1817 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1818 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1819 c->j = (int*)(c->a + nz*bs2); 1820 c->i = c->j + nz; 1821 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1822 if (mbs > 0) { 1823 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1824 if (cpvalues == MAT_COPY_VALUES) { 1825 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1826 } else { 1827 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1828 } 1829 } 1830 1831 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1832 c->sorted = a->sorted; 1833 c->roworiented = a->roworiented; 1834 c->nonew = a->nonew; 1835 1836 if (a->diag) { 1837 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1838 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1839 for (i=0; i<mbs; i++) { 1840 c->diag[i] = a->diag[i]; 1841 } 1842 } else c->diag = 0; 1843 c->s_nz = a->s_nz; 1844 c->s_maxnz = a->s_maxnz; 1845 c->solve_work = 0; 1846 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1847 c->mult_work = 0; 1848 *B = C; 1849 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1850 PetscFunctionReturn(0); 1851 } 1852 1853 #undef __FUNCT__ 1854 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1855 int MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1856 { 1857 Mat_SeqSBAIJ *a; 1858 Mat B; 1859 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1860 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1861 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1862 int *masked,nmask,tmp,bs2,ishift; 1863 PetscScalar *aa; 1864 MPI_Comm comm = ((PetscObject)viewer)->comm; 1865 1866 PetscFunctionBegin; 1867 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1868 bs2 = bs*bs; 1869 1870 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1871 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1872 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1873 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1874 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1875 M = header[1]; N = header[2]; nz = header[3]; 1876 1877 if (header[3] < 0) { 1878 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1879 } 1880 1881 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1882 1883 /* 1884 This code adds extra rows to make sure the number of rows is 1885 divisible by the blocksize 1886 */ 1887 mbs = M/bs; 1888 extra_rows = bs - M + bs*(mbs); 1889 if (extra_rows == bs) extra_rows = 0; 1890 else mbs++; 1891 if (extra_rows) { 1892 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1893 } 1894 1895 /* read in row lengths */ 1896 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1897 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1898 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1899 1900 /* read in column indices */ 1901 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1902 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1903 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1904 1905 /* loop over row lengths determining block row lengths */ 1906 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1907 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1908 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1909 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1910 masked = mask + mbs; 1911 rowcount = 0; nzcount = 0; 1912 for (i=0; i<mbs; i++) { 1913 nmask = 0; 1914 for (j=0; j<bs; j++) { 1915 kmax = rowlengths[rowcount]; 1916 for (k=0; k<kmax; k++) { 1917 tmp = jj[nzcount++]/bs; /* block col. index */ 1918 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1919 } 1920 rowcount++; 1921 } 1922 s_browlengths[i] += nmask; 1923 1924 /* zero out the mask elements we set */ 1925 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1926 } 1927 1928 /* create our matrix */ 1929 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1930 ierr = MatSetType(B,type);CHKERRQ(ierr); 1931 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1932 a = (Mat_SeqSBAIJ*)B->data; 1933 1934 /* set matrix "i" values */ 1935 a->i[0] = 0; 1936 for (i=1; i<= mbs; i++) { 1937 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1938 a->ilen[i-1] = s_browlengths[i-1]; 1939 } 1940 a->s_nz = a->i[mbs]; 1941 1942 /* read in nonzero values */ 1943 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1944 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1945 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1946 1947 /* set "a" and "j" values into matrix */ 1948 nzcount = 0; jcount = 0; 1949 for (i=0; i<mbs; i++) { 1950 nzcountb = nzcount; 1951 nmask = 0; 1952 for (j=0; j<bs; j++) { 1953 kmax = rowlengths[i*bs+j]; 1954 for (k=0; k<kmax; k++) { 1955 tmp = jj[nzcount++]/bs; /* block col. index */ 1956 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1957 } 1958 } 1959 /* sort the masked values */ 1960 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1961 1962 /* set "j" values into matrix */ 1963 maskcount = 1; 1964 for (j=0; j<nmask; j++) { 1965 a->j[jcount++] = masked[j]; 1966 mask[masked[j]] = maskcount++; 1967 } 1968 1969 /* set "a" values into matrix */ 1970 ishift = bs2*a->i[i]; 1971 for (j=0; j<bs; j++) { 1972 kmax = rowlengths[i*bs+j]; 1973 for (k=0; k<kmax; k++) { 1974 tmp = jj[nzcountb]/bs ; /* block col. index */ 1975 if (tmp >= i){ 1976 block = mask[tmp] - 1; 1977 point = jj[nzcountb] - bs*tmp; 1978 idx = ishift + bs2*block + j + bs*point; 1979 a->a[idx] = aa[nzcountb]; 1980 } 1981 nzcountb++; 1982 } 1983 } 1984 /* zero out the mask elements we set */ 1985 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1986 } 1987 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1988 1989 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1990 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1991 ierr = PetscFree(aa);CHKERRQ(ierr); 1992 ierr = PetscFree(jj);CHKERRQ(ierr); 1993 ierr = PetscFree(mask);CHKERRQ(ierr); 1994 1995 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1996 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1997 ierr = MatView_Private(B);CHKERRQ(ierr); 1998 *A = B; 1999 PetscFunctionReturn(0); 2000 } 2001 2002 #undef __FUNCT__ 2003 #define __FUNCT__ "MatRelax_SeqSBAIJ" 2004 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2005 { 2006 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2007 MatScalar *aa=a->a,*v,*v1; 2008 PetscScalar *x,*b,*t,sum,d; 2009 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 2010 int nz,nz1,*vj,*vj1,i; 2011 2012 PetscFunctionBegin; 2013 its = its*lits; 2014 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2015 2016 if (bs > 1) 2017 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2018 2019 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2020 if (xx != bb) { 2021 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2022 } else { 2023 b = x; 2024 } 2025 2026 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 2027 2028 if (flag & SOR_ZERO_INITIAL_GUESS) { 2029 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2030 for (i=0; i<m; i++) 2031 t[i] = b[i]; 2032 2033 for (i=0; i<m; i++){ 2034 d = *(aa + ai[i]); /* diag[i] */ 2035 v = aa + ai[i] + 1; 2036 vj = aj + ai[i] + 1; 2037 nz = ai[i+1] - ai[i] - 1; 2038 x[i] = omega*t[i]/d; 2039 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2040 PetscLogFlops(2*nz-1); 2041 } 2042 } 2043 2044 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2045 for (i=0; i<m; i++) 2046 t[i] = b[i]; 2047 2048 for (i=0; i<m-1; i++){ /* update rhs */ 2049 v = aa + ai[i] + 1; 2050 vj = aj + ai[i] + 1; 2051 nz = ai[i+1] - ai[i] - 1; 2052 while (nz--) t[*vj++] -= x[i]*(*v++); 2053 PetscLogFlops(2*nz-1); 2054 } 2055 for (i=m-1; i>=0; i--){ 2056 d = *(aa + ai[i]); 2057 v = aa + ai[i] + 1; 2058 vj = aj + ai[i] + 1; 2059 nz = ai[i+1] - ai[i] - 1; 2060 sum = t[i]; 2061 while (nz--) sum -= x[*vj++]*(*v++); 2062 PetscLogFlops(2*nz-1); 2063 x[i] = (1-omega)*x[i] + omega*sum/d; 2064 } 2065 } 2066 its--; 2067 } 2068 2069 while (its--) { 2070 /* 2071 forward sweep: 2072 for i=0,...,m-1: 2073 sum[i] = (b[i] - U(i,:)x )/d[i]; 2074 x[i] = (1-omega)x[i] + omega*sum[i]; 2075 b = b - x[i]*U^T(i,:); 2076 2077 */ 2078 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2079 for (i=0; i<m; i++) 2080 t[i] = b[i]; 2081 2082 for (i=0; i<m; i++){ 2083 d = *(aa + ai[i]); /* diag[i] */ 2084 v = aa + ai[i] + 1; v1=v; 2085 vj = aj + ai[i] + 1; vj1=vj; 2086 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2087 sum = t[i]; 2088 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2089 x[i] = (1-omega)*x[i] + omega*sum/d; 2090 while (nz--) t[*vj++] -= x[i]*(*v++); 2091 PetscLogFlops(4*nz-2); 2092 } 2093 } 2094 2095 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2096 /* 2097 backward sweep: 2098 b = b - x[i]*U^T(i,:), i=0,...,n-2 2099 for i=m-1,...,0: 2100 sum[i] = (b[i] - U(i,:)x )/d[i]; 2101 x[i] = (1-omega)x[i] + omega*sum[i]; 2102 */ 2103 for (i=0; i<m; i++) 2104 t[i] = b[i]; 2105 2106 for (i=0; i<m-1; i++){ /* update rhs */ 2107 v = aa + ai[i] + 1; 2108 vj = aj + ai[i] + 1; 2109 nz = ai[i+1] - ai[i] - 1; 2110 while (nz--) t[*vj++] -= x[i]*(*v++); 2111 PetscLogFlops(2*nz-1); 2112 } 2113 for (i=m-1; i>=0; i--){ 2114 d = *(aa + ai[i]); 2115 v = aa + ai[i] + 1; 2116 vj = aj + ai[i] + 1; 2117 nz = ai[i+1] - ai[i] - 1; 2118 sum = t[i]; 2119 while (nz--) sum -= x[*vj++]*(*v++); 2120 PetscLogFlops(2*nz-1); 2121 x[i] = (1-omega)*x[i] + omega*sum/d; 2122 } 2123 } 2124 } 2125 2126 ierr = PetscFree(t); CHKERRQ(ierr); 2127 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2128 if (bb != xx) { 2129 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2130 } 2131 2132 PetscFunctionReturn(0); 2133 } 2134 2135 2136 2137 2138 2139 2140