1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i; 40 41 PetscFunctionBegin; 42 if (!a->diag) { 43 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 } 45 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 if (!blockcompressed) { 61 /* malloc & create the natural set of indices */ 62 ierr = PetscMalloc2((n+1)*bs,PetscInt,*ia,nz*bs,PetscInt,*ja);CHKERRQ(ierr); 63 for (i=0; i<n+1; i++) { 64 for (j=0; j<bs; j++) { 65 *ia[i*bs+j] = a->i[i]*bs+j+oshift; 66 } 67 } 68 for (i=0; i<nz; i++) { 69 for (j=0; j<bs; j++) { 70 *ja[i*bs+j] = a->j[i]*bs+j+oshift; 71 } 72 } 73 } else { /* blockcompressed */ 74 if (oshift == 1) { 75 /* temporarily add 1 to i and j indices */ 76 for (i=0; i<nz; i++) a->j[i]++; 77 for (i=0; i<n+1; i++) a->i[i]++; 78 } 79 *ia = a->i; *ja = a->j; 80 } 81 82 PetscFunctionReturn(0); 83 } 84 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 87 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 88 { 89 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 90 PetscInt i,n = a->mbs,nz = a->i[n]; 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 if (!ia) PetscFunctionReturn(0); 95 96 if (!blockcompressed) { 97 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 98 } else if (oshift == 1) { /* blockcompressed */ 99 for (i=0; i<nz; i++) a->j[i]--; 100 for (i=0; i<n+1; i++) a->i[i]--; 101 } 102 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 108 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 109 { 110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 #if defined(PETSC_USE_LOG) 115 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz); 116 #endif 117 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 118 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 119 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 120 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 121 ierr = PetscFree(a->diag);CHKERRQ(ierr); 122 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 123 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 124 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 125 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 126 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 127 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 128 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 129 130 ierr = PetscFree(a->inew);CHKERRQ(ierr); 131 ierr = PetscFree(a);CHKERRQ(ierr); 132 133 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 134 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 135 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 136 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 137 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 138 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 145 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 146 { 147 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 148 PetscErrorCode ierr; 149 150 PetscFunctionBegin; 151 switch (op) { 152 case MAT_ROW_ORIENTED: 153 a->roworiented = PETSC_TRUE; 154 break; 155 case MAT_COLUMN_ORIENTED: 156 a->roworiented = PETSC_FALSE; 157 break; 158 case MAT_COLUMNS_SORTED: 159 a->sorted = PETSC_TRUE; 160 break; 161 case MAT_COLUMNS_UNSORTED: 162 a->sorted = PETSC_FALSE; 163 break; 164 case MAT_KEEP_ZEROED_ROWS: 165 a->keepzeroedrows = PETSC_TRUE; 166 break; 167 case MAT_NO_NEW_NONZERO_LOCATIONS: 168 a->nonew = 1; 169 break; 170 case MAT_NEW_NONZERO_LOCATION_ERR: 171 a->nonew = -1; 172 break; 173 case MAT_NEW_NONZERO_ALLOCATION_ERR: 174 a->nonew = -2; 175 break; 176 case MAT_YES_NEW_NONZERO_LOCATIONS: 177 a->nonew = 0; 178 break; 179 case MAT_ROWS_SORTED: 180 case MAT_ROWS_UNSORTED: 181 case MAT_YES_NEW_DIAGONALS: 182 case MAT_IGNORE_OFF_PROC_ENTRIES: 183 case MAT_USE_HASH_TABLE: 184 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 185 break; 186 case MAT_NO_NEW_DIAGONALS: 187 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 188 case MAT_NOT_SYMMETRIC: 189 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 190 case MAT_HERMITIAN: 191 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 192 case MAT_SYMMETRIC: 193 case MAT_STRUCTURALLY_SYMMETRIC: 194 case MAT_NOT_HERMITIAN: 195 case MAT_SYMMETRY_ETERNAL: 196 case MAT_NOT_SYMMETRY_ETERNAL: 197 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 198 break; 199 case MAT_IGNORE_LOWER_TRIANGULAR: 200 a->ignore_ltriangular = PETSC_TRUE; 201 break; 202 case MAT_ERROR_LOWER_TRIANGULAR: 203 a->ignore_ltriangular = PETSC_FALSE; 204 break; 205 case MAT_GETROW_UPPERTRIANGULAR: 206 a->getrow_utriangular = PETSC_TRUE; 207 break; 208 default: 209 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 210 } 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 216 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 217 { 218 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 219 PetscErrorCode ierr; 220 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 221 MatScalar *aa,*aa_i; 222 PetscScalar *v_i; 223 224 PetscFunctionBegin; 225 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR) or MatGetRowUpperTriangular()"); 226 /* Get the upper triangular part of the row */ 227 bs = A->rmap.bs; 228 ai = a->i; 229 aj = a->j; 230 aa = a->a; 231 bs2 = a->bs2; 232 233 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 234 235 bn = row/bs; /* Block number */ 236 bp = row % bs; /* Block position */ 237 M = ai[bn+1] - ai[bn]; 238 *ncols = bs*M; 239 240 if (v) { 241 *v = 0; 242 if (*ncols) { 243 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 244 for (i=0; i<M; i++) { /* for each block in the block row */ 245 v_i = *v + i*bs; 246 aa_i = aa + bs2*(ai[bn] + i); 247 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 248 } 249 } 250 } 251 252 if (cols) { 253 *cols = 0; 254 if (*ncols) { 255 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 256 for (i=0; i<M; i++) { /* for each block in the block row */ 257 cols_i = *cols + i*bs; 258 itmp = bs*aj[ai[bn] + i]; 259 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 260 } 261 } 262 } 263 264 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 265 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 266 #ifdef column_search 267 v_i = *v + M*bs; 268 cols_i = *cols + M*bs; 269 for (i=0; i<bn; i++){ /* for each block row */ 270 M = ai[i+1] - ai[i]; 271 for (j=0; j<M; j++){ 272 itmp = aj[ai[i] + j]; /* block column value */ 273 if (itmp == bn){ 274 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 275 for (k=0; k<bs; k++) { 276 *cols_i++ = i*bs+k; 277 *v_i++ = aa_i[k]; 278 } 279 *ncols += bs; 280 break; 281 } 282 } 283 } 284 #endif 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 290 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 291 { 292 PetscErrorCode ierr; 293 294 PetscFunctionBegin; 295 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 296 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNCT__ 301 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 302 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 303 { 304 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 305 306 PetscFunctionBegin; 307 a->getrow_utriangular = PETSC_TRUE; 308 PetscFunctionReturn(0); 309 } 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 312 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 313 { 314 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 315 316 PetscFunctionBegin; 317 a->getrow_utriangular = PETSC_FALSE; 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 323 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 324 { 325 PetscErrorCode ierr; 326 PetscFunctionBegin; 327 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 #undef __FUNCT__ 332 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 333 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 334 { 335 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 336 PetscErrorCode ierr; 337 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 338 const char *name; 339 PetscViewerFormat format; 340 341 PetscFunctionBegin; 342 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 343 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 344 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 345 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 346 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 347 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 348 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 349 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 350 for (i=0; i<a->mbs; i++) { 351 for (j=0; j<bs; j++) { 352 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 353 for (k=a->i[i]; k<a->i[i+1]; k++) { 354 for (l=0; l<bs; l++) { 355 #if defined(PETSC_USE_COMPLEX) 356 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 357 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 358 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 359 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 360 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 361 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 362 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 363 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 364 } 365 #else 366 if (a->a[bs2*k + l*bs + j] != 0.0) { 367 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 368 } 369 #endif 370 } 371 } 372 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 373 } 374 } 375 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 376 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 377 PetscFunctionReturn(0); 378 } else { 379 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 380 for (i=0; i<a->mbs; i++) { 381 for (j=0; j<bs; j++) { 382 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 383 for (k=a->i[i]; k<a->i[i+1]; k++) { 384 for (l=0; l<bs; l++) { 385 #if defined(PETSC_USE_COMPLEX) 386 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 387 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 388 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 389 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 390 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 391 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 392 } else { 393 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 394 } 395 #else 396 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 397 #endif 398 } 399 } 400 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 401 } 402 } 403 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 404 } 405 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 411 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 412 { 413 Mat A = (Mat) Aa; 414 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 415 PetscErrorCode ierr; 416 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 417 PetscMPIInt rank; 418 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 419 MatScalar *aa; 420 MPI_Comm comm; 421 PetscViewer viewer; 422 423 PetscFunctionBegin; 424 /* 425 This is nasty. If this is called from an originally parallel matrix 426 then all processes call this,but only the first has the matrix so the 427 rest should return immediately. 428 */ 429 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 430 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 431 if (rank) PetscFunctionReturn(0); 432 433 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 434 435 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 436 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 437 438 /* loop over matrix elements drawing boxes */ 439 color = PETSC_DRAW_BLUE; 440 for (i=0,row=0; i<mbs; i++,row+=bs) { 441 for (j=a->i[i]; j<a->i[i+1]; j++) { 442 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 443 x_l = a->j[j]*bs; x_r = x_l + 1.0; 444 aa = a->a + j*bs2; 445 for (k=0; k<bs; k++) { 446 for (l=0; l<bs; l++) { 447 if (PetscRealPart(*aa++) >= 0.) continue; 448 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 color = PETSC_DRAW_CYAN; 454 for (i=0,row=0; i<mbs; i++,row+=bs) { 455 for (j=a->i[i]; j<a->i[i+1]; j++) { 456 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 457 x_l = a->j[j]*bs; x_r = x_l + 1.0; 458 aa = a->a + j*bs2; 459 for (k=0; k<bs; k++) { 460 for (l=0; l<bs; l++) { 461 if (PetscRealPart(*aa++) != 0.) continue; 462 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 463 } 464 } 465 } 466 } 467 468 color = PETSC_DRAW_RED; 469 for (i=0,row=0; i<mbs; i++,row+=bs) { 470 for (j=a->i[i]; j<a->i[i+1]; j++) { 471 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 472 x_l = a->j[j]*bs; x_r = x_l + 1.0; 473 aa = a->a + j*bs2; 474 for (k=0; k<bs; k++) { 475 for (l=0; l<bs; l++) { 476 if (PetscRealPart(*aa++) <= 0.) continue; 477 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 478 } 479 } 480 } 481 } 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 487 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 488 { 489 PetscErrorCode ierr; 490 PetscReal xl,yl,xr,yr,w,h; 491 PetscDraw draw; 492 PetscTruth isnull; 493 494 PetscFunctionBegin; 495 496 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 497 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 498 499 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 500 xr = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 501 xr += w; yr += h; xl = -w; yl = -h; 502 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 503 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 504 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatView_SeqSBAIJ" 510 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 511 { 512 PetscErrorCode ierr; 513 PetscTruth iascii,isdraw; 514 515 PetscFunctionBegin; 516 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 517 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 518 if (iascii){ 519 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 520 } else if (isdraw) { 521 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 522 } else { 523 Mat B; 524 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 525 ierr = MatView(B,viewer);CHKERRQ(ierr); 526 ierr = MatDestroy(B);CHKERRQ(ierr); 527 } 528 PetscFunctionReturn(0); 529 } 530 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 534 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 535 { 536 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 537 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 538 PetscInt *ai = a->i,*ailen = a->ilen; 539 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 540 MatScalar *ap,*aa = a->a,zero = 0.0; 541 542 PetscFunctionBegin; 543 for (k=0; k<m; k++) { /* loop over rows */ 544 row = im[k]; brow = row/bs; 545 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 546 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 547 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 548 nrow = ailen[brow]; 549 for (l=0; l<n; l++) { /* loop over columns */ 550 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 551 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 552 col = in[l] ; 553 bcol = col/bs; 554 cidx = col%bs; 555 ridx = row%bs; 556 high = nrow; 557 low = 0; /* assume unsorted */ 558 while (high-low > 5) { 559 t = (low+high)/2; 560 if (rp[t] > bcol) high = t; 561 else low = t; 562 } 563 for (i=low; i<high; i++) { 564 if (rp[i] > bcol) break; 565 if (rp[i] == bcol) { 566 *v++ = ap[bs2*i+bs*cidx+ridx]; 567 goto finished; 568 } 569 } 570 *v++ = zero; 571 finished:; 572 } 573 } 574 PetscFunctionReturn(0); 575 } 576 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 580 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 581 { 582 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 583 PetscErrorCode ierr; 584 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 585 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 586 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 587 PetscTruth roworiented=a->roworiented; 588 const MatScalar *value = v; 589 MatScalar *ap,*aa = a->a,*bap; 590 591 PetscFunctionBegin; 592 if (roworiented) { 593 stepval = (n-1)*bs; 594 } else { 595 stepval = (m-1)*bs; 596 } 597 for (k=0; k<m; k++) { /* loop over added rows */ 598 row = im[k]; 599 if (row < 0) continue; 600 #if defined(PETSC_USE_DEBUG) 601 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 602 #endif 603 rp = aj + ai[row]; 604 ap = aa + bs2*ai[row]; 605 rmax = imax[row]; 606 nrow = ailen[row]; 607 low = 0; 608 high = nrow; 609 for (l=0; l<n; l++) { /* loop over added columns */ 610 if (in[l] < 0) continue; 611 col = in[l]; 612 #if defined(PETSC_USE_DEBUG) 613 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 614 #endif 615 if (col < row) continue; /* ignore lower triangular block */ 616 if (roworiented) { 617 value = v + k*(stepval+bs)*bs + l*bs; 618 } else { 619 value = v + l*(stepval+bs)*bs + k*bs; 620 } 621 if (col <= lastcol) low = 0; else high = nrow; 622 lastcol = col; 623 while (high-low > 7) { 624 t = (low+high)/2; 625 if (rp[t] > col) high = t; 626 else low = t; 627 } 628 for (i=low; i<high; i++) { 629 if (rp[i] > col) break; 630 if (rp[i] == col) { 631 bap = ap + bs2*i; 632 if (roworiented) { 633 if (is == ADD_VALUES) { 634 for (ii=0; ii<bs; ii++,value+=stepval) { 635 for (jj=ii; jj<bs2; jj+=bs) { 636 bap[jj] += *value++; 637 } 638 } 639 } else { 640 for (ii=0; ii<bs; ii++,value+=stepval) { 641 for (jj=ii; jj<bs2; jj+=bs) { 642 bap[jj] = *value++; 643 } 644 } 645 } 646 } else { 647 if (is == ADD_VALUES) { 648 for (ii=0; ii<bs; ii++,value+=stepval) { 649 for (jj=0; jj<bs; jj++) { 650 *bap++ += *value++; 651 } 652 } 653 } else { 654 for (ii=0; ii<bs; ii++,value+=stepval) { 655 for (jj=0; jj<bs; jj++) { 656 *bap++ = *value++; 657 } 658 } 659 } 660 } 661 goto noinsert2; 662 } 663 } 664 if (nonew == 1) goto noinsert2; 665 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 666 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 667 N = nrow++ - 1; high++; 668 /* shift up all the later entries in this row */ 669 for (ii=N; ii>=i; ii--) { 670 rp[ii+1] = rp[ii]; 671 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 672 } 673 if (N >= i) { 674 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 675 } 676 rp[i] = col; 677 bap = ap + bs2*i; 678 if (roworiented) { 679 for (ii=0; ii<bs; ii++,value+=stepval) { 680 for (jj=ii; jj<bs2; jj+=bs) { 681 bap[jj] = *value++; 682 } 683 } 684 } else { 685 for (ii=0; ii<bs; ii++,value+=stepval) { 686 for (jj=0; jj<bs; jj++) { 687 *bap++ = *value++; 688 } 689 } 690 } 691 noinsert2:; 692 low = i; 693 } 694 ailen[row] = nrow; 695 } 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 701 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 702 { 703 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 704 PetscErrorCode ierr; 705 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 706 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 707 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 708 MatScalar *aa = a->a,*ap; 709 710 PetscFunctionBegin; 711 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 712 713 if (m) rmax = ailen[0]; 714 for (i=1; i<mbs; i++) { 715 /* move each row back by the amount of empty slots (fshift) before it*/ 716 fshift += imax[i-1] - ailen[i-1]; 717 rmax = PetscMax(rmax,ailen[i]); 718 if (fshift) { 719 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 720 N = ailen[i]; 721 for (j=0; j<N; j++) { 722 ip[j-fshift] = ip[j]; 723 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 724 } 725 } 726 ai[i] = ai[i-1] + ailen[i-1]; 727 } 728 if (mbs) { 729 fshift += imax[mbs-1] - ailen[mbs-1]; 730 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 731 } 732 /* reset ilen and imax for each row */ 733 for (i=0; i<mbs; i++) { 734 ailen[i] = imax[i] = ai[i+1] - ai[i]; 735 } 736 a->nz = ai[mbs]; 737 738 /* diagonals may have moved, reset it */ 739 if (a->diag) { 740 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 741 } 742 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 743 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 744 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 745 a->reallocs = 0; 746 A->info.nz_unneeded = (PetscReal)fshift*bs2; 747 PetscFunctionReturn(0); 748 } 749 750 /* 751 This function returns an array of flags which indicate the locations of contiguous 752 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 753 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 754 Assume: sizes should be long enough to hold all the values. 755 */ 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 758 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 759 { 760 PetscInt i,j,k,row; 761 PetscTruth flg; 762 763 PetscFunctionBegin; 764 for (i=0,j=0; i<n; j++) { 765 row = idx[i]; 766 if (row%bs!=0) { /* Not the begining of a block */ 767 sizes[j] = 1; 768 i++; 769 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 770 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 771 i++; 772 } else { /* Begining of the block, so check if the complete block exists */ 773 flg = PETSC_TRUE; 774 for (k=1; k<bs; k++) { 775 if (row+k != idx[i+k]) { /* break in the block */ 776 flg = PETSC_FALSE; 777 break; 778 } 779 } 780 if (flg) { /* No break in the bs */ 781 sizes[j] = bs; 782 i+= bs; 783 } else { 784 sizes[j] = 1; 785 i++; 786 } 787 } 788 } 789 *bs_max = j; 790 PetscFunctionReturn(0); 791 } 792 793 794 /* Only add/insert a(i,j) with i<=j (blocks). 795 Any a(i,j) with i>j input by user is ingored. 796 */ 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 800 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 801 { 802 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 803 PetscErrorCode ierr; 804 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 805 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 806 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 807 PetscInt ridx,cidx,bs2=a->bs2; 808 MatScalar *ap,value,*aa=a->a,*bap; 809 810 PetscFunctionBegin; 811 for (k=0; k<m; k++) { /* loop over added rows */ 812 row = im[k]; /* row number */ 813 brow = row/bs; /* block row number */ 814 if (row < 0) continue; 815 #if defined(PETSC_USE_DEBUG) 816 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 817 #endif 818 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 819 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 820 rmax = imax[brow]; /* maximum space allocated for this row */ 821 nrow = ailen[brow]; /* actual length of this row */ 822 low = 0; 823 824 for (l=0; l<n; l++) { /* loop over added columns */ 825 if (in[l] < 0) continue; 826 #if defined(PETSC_USE_DEBUG) 827 if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1); 828 #endif 829 col = in[l]; 830 bcol = col/bs; /* block col number */ 831 832 if (brow > bcol) { 833 if (a->ignore_ltriangular){ 834 continue; /* ignore lower triangular values */ 835 } else { 836 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 837 } 838 } 839 840 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 841 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 842 /* element value a(k,l) */ 843 if (roworiented) { 844 value = v[l + k*n]; 845 } else { 846 value = v[k + l*m]; 847 } 848 849 /* move pointer bap to a(k,l) quickly and add/insert value */ 850 if (col <= lastcol) low = 0; high = nrow; 851 lastcol = col; 852 while (high-low > 7) { 853 t = (low+high)/2; 854 if (rp[t] > bcol) high = t; 855 else low = t; 856 } 857 for (i=low; i<high; i++) { 858 if (rp[i] > bcol) break; 859 if (rp[i] == bcol) { 860 bap = ap + bs2*i + bs*cidx + ridx; 861 if (is == ADD_VALUES) *bap += value; 862 else *bap = value; 863 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 864 if (brow == bcol && ridx < cidx){ 865 bap = ap + bs2*i + bs*ridx + cidx; 866 if (is == ADD_VALUES) *bap += value; 867 else *bap = value; 868 } 869 goto noinsert1; 870 } 871 } 872 873 if (nonew == 1) goto noinsert1; 874 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 875 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 876 877 N = nrow++ - 1; high++; 878 /* shift up all the later entries in this row */ 879 for (ii=N; ii>=i; ii--) { 880 rp[ii+1] = rp[ii]; 881 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 882 } 883 if (N>=i) { 884 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 885 } 886 rp[i] = bcol; 887 ap[bs2*i + bs*cidx + ridx] = value; 888 noinsert1:; 889 low = i; 890 } 891 } /* end of loop over added columns */ 892 ailen[brow] = nrow; 893 } /* end of loop over added rows */ 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 899 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 900 { 901 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 902 Mat outA; 903 PetscErrorCode ierr; 904 PetscTruth row_identity; 905 906 PetscFunctionBegin; 907 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 908 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 909 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 910 if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 911 912 outA = inA; 913 inA->factor = FACTOR_CHOLESKY; 914 915 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 916 /* 917 Blocksize < 8 have a special faster factorization/solver 918 for ICC(0) factorization with natural ordering 919 */ 920 switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */ 921 case 1: 922 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 923 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 924 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 925 inA->ops->solves = MatSolves_SeqSBAIJ_1; 926 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 927 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 928 ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr); 929 break; 930 case 2: 931 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 932 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 933 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 934 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering; 935 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering; 936 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 937 break; 938 case 3: 939 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 940 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 941 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 942 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering; 943 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering; 944 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 945 break; 946 case 4: 947 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 948 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 949 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 950 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering; 951 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering; 952 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 953 break; 954 case 5: 955 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 956 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 957 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 958 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering; 959 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering; 960 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 961 break; 962 case 6: 963 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 964 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 965 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 966 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering; 967 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering; 968 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 969 break; 970 case 7: 971 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 972 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 973 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 974 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering; 975 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering; 976 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 977 break; 978 default: 979 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 980 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 981 inA->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 982 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering; 983 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering; 984 break; 985 } 986 987 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 988 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 989 a->row = row; 990 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 991 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 992 a->col = row; 993 994 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 995 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 996 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 997 998 if (!a->solve_work) { 999 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1000 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1001 } 1002 1003 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 EXTERN_C_BEGIN 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1010 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1011 { 1012 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1013 PetscInt i,nz,n; 1014 1015 PetscFunctionBegin; 1016 nz = baij->maxnz; 1017 n = mat->cmap.n; 1018 for (i=0; i<nz; i++) { 1019 baij->j[i] = indices[i]; 1020 } 1021 baij->nz = nz; 1022 for (i=0; i<n; i++) { 1023 baij->ilen[i] = baij->imax[i]; 1024 } 1025 PetscFunctionReturn(0); 1026 } 1027 EXTERN_C_END 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1031 /*@ 1032 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1033 in the matrix. 1034 1035 Input Parameters: 1036 + mat - the SeqSBAIJ matrix 1037 - indices - the column indices 1038 1039 Level: advanced 1040 1041 Notes: 1042 This can be called if you have precomputed the nonzero structure of the 1043 matrix and want to provide it to the matrix object to improve the performance 1044 of the MatSetValues() operation. 1045 1046 You MUST have set the correct numbers of nonzeros per row in the call to 1047 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1048 1049 MUST be called before any calls to MatSetValues() 1050 1051 .seealso: MatCreateSeqSBAIJ 1052 @*/ 1053 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1054 { 1055 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1056 1057 PetscFunctionBegin; 1058 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1059 PetscValidPointer(indices,2); 1060 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1061 if (f) { 1062 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1063 } else { 1064 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1071 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1072 { 1073 PetscErrorCode ierr; 1074 1075 PetscFunctionBegin; 1076 /* If the two matrices have the same copy implementation, use fast copy. */ 1077 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1078 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1079 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1080 1081 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1082 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1083 } 1084 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1085 } else { 1086 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1087 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1088 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1089 } 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1095 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1096 { 1097 PetscErrorCode ierr; 1098 1099 PetscFunctionBegin; 1100 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1106 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1107 { 1108 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1109 PetscFunctionBegin; 1110 *array = a->a; 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1116 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1117 { 1118 PetscFunctionBegin; 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #include "petscblaslapack.h" 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1125 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1126 { 1127 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1128 PetscErrorCode ierr; 1129 PetscInt i,bs=Y->rmap.bs,bs2,j; 1130 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1131 1132 PetscFunctionBegin; 1133 if (str == SAME_NONZERO_PATTERN) { 1134 PetscScalar alpha = a; 1135 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1136 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1137 if (y->xtoy && y->XtoY != X) { 1138 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1139 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1140 } 1141 if (!y->xtoy) { /* get xtoy */ 1142 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1143 y->XtoY = X; 1144 } 1145 bs2 = bs*bs; 1146 for (i=0; i<x->nz; i++) { 1147 j = 0; 1148 while (j < bs2){ 1149 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1150 j++; 1151 } 1152 } 1153 ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1154 } else { 1155 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1156 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1157 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1158 } 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1164 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1165 { 1166 PetscFunctionBegin; 1167 *flg = PETSC_TRUE; 1168 PetscFunctionReturn(0); 1169 } 1170 1171 #undef __FUNCT__ 1172 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1173 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1174 { 1175 PetscFunctionBegin; 1176 *flg = PETSC_TRUE; 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1182 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1183 { 1184 PetscFunctionBegin; 1185 *flg = PETSC_FALSE; 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1191 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1192 { 1193 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1194 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1195 PetscScalar *aa = a->a; 1196 1197 PetscFunctionBegin; 1198 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1204 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1205 { 1206 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1207 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1208 PetscScalar *aa = a->a; 1209 1210 PetscFunctionBegin; 1211 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1212 PetscFunctionReturn(0); 1213 } 1214 1215 /* -------------------------------------------------------------------*/ 1216 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1217 MatGetRow_SeqSBAIJ, 1218 MatRestoreRow_SeqSBAIJ, 1219 MatMult_SeqSBAIJ_N, 1220 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1221 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1222 MatMultAdd_SeqSBAIJ_N, 1223 MatSolve_SeqSBAIJ_N, 1224 0, 1225 0, 1226 /*10*/ 0, 1227 0, 1228 MatCholeskyFactor_SeqSBAIJ, 1229 MatRelax_SeqSBAIJ, 1230 MatTranspose_SeqSBAIJ, 1231 /*15*/ MatGetInfo_SeqSBAIJ, 1232 MatEqual_SeqSBAIJ, 1233 MatGetDiagonal_SeqSBAIJ, 1234 MatDiagonalScale_SeqSBAIJ, 1235 MatNorm_SeqSBAIJ, 1236 /*20*/ 0, 1237 MatAssemblyEnd_SeqSBAIJ, 1238 0, 1239 MatSetOption_SeqSBAIJ, 1240 MatZeroEntries_SeqSBAIJ, 1241 /*25*/ 0, 1242 0, 1243 0, 1244 MatCholeskyFactorSymbolic_SeqSBAIJ, 1245 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1246 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1247 0, 1248 MatICCFactorSymbolic_SeqSBAIJ, 1249 MatGetArray_SeqSBAIJ, 1250 MatRestoreArray_SeqSBAIJ, 1251 /*35*/ MatDuplicate_SeqSBAIJ, 1252 MatForwardSolve_SeqSBAIJ_N, 1253 MatBackwardSolve_SeqSBAIJ_N, 1254 0, 1255 MatICCFactor_SeqSBAIJ, 1256 /*40*/ MatAXPY_SeqSBAIJ, 1257 MatGetSubMatrices_SeqSBAIJ, 1258 MatIncreaseOverlap_SeqSBAIJ, 1259 MatGetValues_SeqSBAIJ, 1260 MatCopy_SeqSBAIJ, 1261 /*45*/ 0, 1262 MatScale_SeqSBAIJ, 1263 0, 1264 0, 1265 0, 1266 /*50*/ 0, 1267 MatGetRowIJ_SeqSBAIJ, 1268 MatRestoreRowIJ_SeqSBAIJ, 1269 0, 1270 0, 1271 /*55*/ 0, 1272 0, 1273 0, 1274 0, 1275 MatSetValuesBlocked_SeqSBAIJ, 1276 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1277 0, 1278 0, 1279 0, 1280 0, 1281 /*65*/ 0, 1282 0, 1283 0, 1284 0, 1285 0, 1286 /*70*/ MatGetRowMaxAbs_SeqSBAIJ, 1287 0, 1288 0, 1289 0, 1290 0, 1291 /*75*/ 0, 1292 0, 1293 0, 1294 0, 1295 0, 1296 /*80*/ 0, 1297 0, 1298 0, 1299 #if !defined(PETSC_USE_COMPLEX) 1300 MatGetInertia_SeqSBAIJ, 1301 #else 1302 0, 1303 #endif 1304 MatLoad_SeqSBAIJ, 1305 /*85*/ MatIsSymmetric_SeqSBAIJ, 1306 MatIsHermitian_SeqSBAIJ, 1307 MatIsStructurallySymmetric_SeqSBAIJ, 1308 0, 1309 0, 1310 /*90*/ 0, 1311 0, 1312 0, 1313 0, 1314 0, 1315 /*95*/ 0, 1316 0, 1317 0, 1318 0, 1319 0, 1320 /*100*/0, 1321 0, 1322 0, 1323 0, 1324 0, 1325 /*105*/0, 1326 MatRealPart_SeqSBAIJ, 1327 MatImaginaryPart_SeqSBAIJ, 1328 MatGetRowUpperTriangular_SeqSBAIJ, 1329 MatRestoreRowUpperTriangular_SeqSBAIJ 1330 }; 1331 1332 EXTERN_C_BEGIN 1333 #undef __FUNCT__ 1334 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1335 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1336 { 1337 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1338 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1339 PetscErrorCode ierr; 1340 1341 PetscFunctionBegin; 1342 if (aij->nonew != 1) { 1343 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1344 } 1345 1346 /* allocate space for values if not already there */ 1347 if (!aij->saved_values) { 1348 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1349 } 1350 1351 /* copy values over */ 1352 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 EXTERN_C_END 1356 1357 EXTERN_C_BEGIN 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1360 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1361 { 1362 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1363 PetscErrorCode ierr; 1364 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1365 1366 PetscFunctionBegin; 1367 if (aij->nonew != 1) { 1368 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1369 } 1370 if (!aij->saved_values) { 1371 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1372 } 1373 1374 /* copy values over */ 1375 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 EXTERN_C_END 1379 1380 EXTERN_C_BEGIN 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1383 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1384 { 1385 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1386 PetscErrorCode ierr; 1387 PetscInt i,mbs,bs2; 1388 PetscTruth skipallocation = PETSC_FALSE,flg; 1389 1390 PetscFunctionBegin; 1391 B->preallocated = PETSC_TRUE; 1392 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1393 B->rmap.bs = B->cmap.bs = bs; 1394 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1395 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1396 1397 mbs = B->rmap.N/bs; 1398 bs2 = bs*bs; 1399 1400 if (mbs*bs != B->rmap.N) { 1401 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1402 } 1403 1404 if (nz == MAT_SKIP_ALLOCATION) { 1405 skipallocation = PETSC_TRUE; 1406 nz = 0; 1407 } 1408 1409 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1410 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1411 if (nnz) { 1412 for (i=0; i<mbs; i++) { 1413 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1414 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1415 } 1416 } 1417 1418 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1419 if (!flg) { 1420 switch (bs) { 1421 case 1: 1422 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1423 B->ops->solve = MatSolve_SeqSBAIJ_1; 1424 B->ops->solves = MatSolves_SeqSBAIJ_1; 1425 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1426 B->ops->mult = MatMult_SeqSBAIJ_1; 1427 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1428 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1429 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1430 break; 1431 case 2: 1432 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1433 B->ops->solve = MatSolve_SeqSBAIJ_2; 1434 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1435 B->ops->mult = MatMult_SeqSBAIJ_2; 1436 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1437 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1438 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1439 break; 1440 case 3: 1441 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1442 B->ops->solve = MatSolve_SeqSBAIJ_3; 1443 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1444 B->ops->mult = MatMult_SeqSBAIJ_3; 1445 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1446 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1447 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1448 break; 1449 case 4: 1450 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1451 B->ops->solve = MatSolve_SeqSBAIJ_4; 1452 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1453 B->ops->mult = MatMult_SeqSBAIJ_4; 1454 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1455 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1456 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1457 break; 1458 case 5: 1459 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1460 B->ops->solve = MatSolve_SeqSBAIJ_5; 1461 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1462 B->ops->mult = MatMult_SeqSBAIJ_5; 1463 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1464 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1465 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1466 break; 1467 case 6: 1468 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1469 B->ops->solve = MatSolve_SeqSBAIJ_6; 1470 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1471 B->ops->mult = MatMult_SeqSBAIJ_6; 1472 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1473 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1474 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1475 break; 1476 case 7: 1477 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1478 B->ops->solve = MatSolve_SeqSBAIJ_7; 1479 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1480 B->ops->mult = MatMult_SeqSBAIJ_7; 1481 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1482 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1483 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1484 break; 1485 } 1486 } 1487 1488 b->mbs = mbs; 1489 b->nbs = mbs; 1490 if (!skipallocation) { 1491 /* b->ilen will count nonzeros in each block row so far. */ 1492 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1493 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1494 if (!nnz) { 1495 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1496 else if (nz <= 0) nz = 1; 1497 for (i=0; i<mbs; i++) { 1498 b->imax[i] = nz; 1499 } 1500 nz = nz*mbs; /* total nz */ 1501 } else { 1502 nz = 0; 1503 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1504 } 1505 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1506 1507 /* allocate the matrix space */ 1508 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 1509 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1510 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1511 b->singlemalloc = PETSC_TRUE; 1512 1513 /* pointer to beginning of each row */ 1514 b->i[0] = 0; 1515 for (i=1; i<mbs+1; i++) { 1516 b->i[i] = b->i[i-1] + b->imax[i-1]; 1517 } 1518 b->free_a = PETSC_TRUE; 1519 b->free_ij = PETSC_TRUE; 1520 } else { 1521 b->free_a = PETSC_FALSE; 1522 b->free_ij = PETSC_FALSE; 1523 } 1524 1525 B->rmap.bs = bs; 1526 b->bs2 = bs2; 1527 b->nz = 0; 1528 b->maxnz = nz*bs2; 1529 1530 b->inew = 0; 1531 b->jnew = 0; 1532 b->anew = 0; 1533 b->a2anew = 0; 1534 b->permute = PETSC_FALSE; 1535 PetscFunctionReturn(0); 1536 } 1537 EXTERN_C_END 1538 1539 EXTERN_C_BEGIN 1540 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1541 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1542 EXTERN_C_END 1543 1544 /*MC 1545 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1546 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1547 1548 Options Database Keys: 1549 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1550 1551 Level: beginner 1552 1553 .seealso: MatCreateSeqSBAIJ 1554 M*/ 1555 1556 EXTERN_C_BEGIN 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1559 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1560 { 1561 Mat_SeqSBAIJ *b; 1562 PetscErrorCode ierr; 1563 PetscMPIInt size; 1564 PetscTruth flg; 1565 1566 PetscFunctionBegin; 1567 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1568 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1569 1570 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1571 B->data = (void*)b; 1572 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1573 B->ops->destroy = MatDestroy_SeqSBAIJ; 1574 B->ops->view = MatView_SeqSBAIJ; 1575 B->factor = 0; 1576 B->mapping = 0; 1577 b->row = 0; 1578 b->icol = 0; 1579 b->reallocs = 0; 1580 b->saved_values = 0; 1581 1582 1583 b->sorted = PETSC_FALSE; 1584 b->roworiented = PETSC_TRUE; 1585 b->nonew = 0; 1586 b->diag = 0; 1587 b->solve_work = 0; 1588 b->mult_work = 0; 1589 B->spptr = 0; 1590 b->keepzeroedrows = PETSC_FALSE; 1591 b->xtoy = 0; 1592 b->XtoY = 0; 1593 1594 b->inew = 0; 1595 b->jnew = 0; 1596 b->anew = 0; 1597 b->a2anew = 0; 1598 b->permute = PETSC_FALSE; 1599 1600 b->ignore_ltriangular = PETSC_FALSE; 1601 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1602 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1603 1604 b->getrow_utriangular = PETSC_FALSE; 1605 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr); 1606 if (flg) b->getrow_utriangular = PETSC_TRUE; 1607 1608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1609 "MatStoreValues_SeqSBAIJ", 1610 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1612 "MatRetrieveValues_SeqSBAIJ", 1613 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1614 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1615 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1616 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1617 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1618 "MatConvert_SeqSBAIJ_SeqAIJ", 1619 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1620 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1621 "MatConvert_SeqSBAIJ_SeqBAIJ", 1622 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1623 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1624 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1625 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1626 1627 B->symmetric = PETSC_TRUE; 1628 B->structurally_symmetric = PETSC_TRUE; 1629 B->symmetric_set = PETSC_TRUE; 1630 B->structurally_symmetric_set = PETSC_TRUE; 1631 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 EXTERN_C_END 1635 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1638 /*@C 1639 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1640 compressed row) format. For good matrix assembly performance the 1641 user should preallocate the matrix storage by setting the parameter nz 1642 (or the array nnz). By setting these parameters accurately, performance 1643 during matrix assembly can be increased by more than a factor of 50. 1644 1645 Collective on Mat 1646 1647 Input Parameters: 1648 + A - the symmetric matrix 1649 . bs - size of block 1650 . nz - number of block nonzeros per block row (same for all rows) 1651 - nnz - array containing the number of block nonzeros in the upper triangular plus 1652 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1653 1654 Options Database Keys: 1655 . -mat_no_unroll - uses code that does not unroll the loops in the 1656 block calculations (much slower) 1657 . -mat_block_size - size of the blocks to use 1658 1659 Level: intermediate 1660 1661 Notes: 1662 Specify the preallocated storage with either nz or nnz (not both). 1663 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1664 allocation. For additional details, see the users manual chapter on 1665 matrices. 1666 1667 If the nnz parameter is given then the nz parameter is ignored 1668 1669 1670 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1671 @*/ 1672 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1673 { 1674 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1675 1676 PetscFunctionBegin; 1677 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1678 if (f) { 1679 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1680 } 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatCreateSeqSBAIJ" 1686 /*@C 1687 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1688 compressed row) format. For good matrix assembly performance the 1689 user should preallocate the matrix storage by setting the parameter nz 1690 (or the array nnz). By setting these parameters accurately, performance 1691 during matrix assembly can be increased by more than a factor of 50. 1692 1693 Collective on MPI_Comm 1694 1695 Input Parameters: 1696 + comm - MPI communicator, set to PETSC_COMM_SELF 1697 . bs - size of block 1698 . m - number of rows, or number of columns 1699 . nz - number of block nonzeros per block row (same for all rows) 1700 - nnz - array containing the number of block nonzeros in the upper triangular plus 1701 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1702 1703 Output Parameter: 1704 . A - the symmetric matrix 1705 1706 Options Database Keys: 1707 . -mat_no_unroll - uses code that does not unroll the loops in the 1708 block calculations (much slower) 1709 . -mat_block_size - size of the blocks to use 1710 1711 Level: intermediate 1712 1713 Notes: 1714 1715 Specify the preallocated storage with either nz or nnz (not both). 1716 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1717 allocation. For additional details, see the users manual chapter on 1718 matrices. 1719 1720 If the nnz parameter is given then the nz parameter is ignored 1721 1722 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1723 @*/ 1724 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1725 { 1726 PetscErrorCode ierr; 1727 1728 PetscFunctionBegin; 1729 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1730 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1731 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1732 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1738 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1739 { 1740 Mat C; 1741 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1742 PetscErrorCode ierr; 1743 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1744 1745 PetscFunctionBegin; 1746 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1747 1748 *B = 0; 1749 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1750 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 1751 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1752 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1753 c = (Mat_SeqSBAIJ*)C->data; 1754 1755 C->preallocated = PETSC_TRUE; 1756 C->factor = A->factor; 1757 c->row = 0; 1758 c->icol = 0; 1759 c->saved_values = 0; 1760 c->keepzeroedrows = a->keepzeroedrows; 1761 C->assembled = PETSC_TRUE; 1762 1763 ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 1764 ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 1765 c->bs2 = a->bs2; 1766 c->mbs = a->mbs; 1767 c->nbs = a->nbs; 1768 1769 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 1770 for (i=0; i<mbs; i++) { 1771 c->imax[i] = a->imax[i]; 1772 c->ilen[i] = a->ilen[i]; 1773 } 1774 1775 /* allocate the matrix space */ 1776 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1777 c->singlemalloc = PETSC_TRUE; 1778 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1779 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1780 if (mbs > 0) { 1781 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1782 if (cpvalues == MAT_COPY_VALUES) { 1783 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1784 } else { 1785 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1786 } 1787 } 1788 1789 c->sorted = a->sorted; 1790 c->roworiented = a->roworiented; 1791 c->nonew = a->nonew; 1792 1793 if (a->diag) { 1794 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1795 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1796 for (i=0; i<mbs; i++) { 1797 c->diag[i] = a->diag[i]; 1798 } 1799 } else c->diag = 0; 1800 c->nz = a->nz; 1801 c->maxnz = a->maxnz; 1802 c->solve_work = 0; 1803 c->mult_work = 0; 1804 c->free_a = PETSC_TRUE; 1805 c->free_ij = PETSC_TRUE; 1806 *B = C; 1807 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1808 PetscFunctionReturn(0); 1809 } 1810 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1813 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1814 { 1815 Mat_SeqSBAIJ *a; 1816 Mat B; 1817 PetscErrorCode ierr; 1818 int fd; 1819 PetscMPIInt size; 1820 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1821 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1822 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1823 PetscInt *masked,nmask,tmp,bs2,ishift; 1824 PetscScalar *aa; 1825 MPI_Comm comm = ((PetscObject)viewer)->comm; 1826 1827 PetscFunctionBegin; 1828 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1829 bs2 = bs*bs; 1830 1831 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1832 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1833 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1834 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1835 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1836 M = header[1]; N = header[2]; nz = header[3]; 1837 1838 if (header[3] < 0) { 1839 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1840 } 1841 1842 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1843 1844 /* 1845 This code adds extra rows to make sure the number of rows is 1846 divisible by the blocksize 1847 */ 1848 mbs = M/bs; 1849 extra_rows = bs - M + bs*(mbs); 1850 if (extra_rows == bs) extra_rows = 0; 1851 else mbs++; 1852 if (extra_rows) { 1853 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 1854 } 1855 1856 /* read in row lengths */ 1857 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1858 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1859 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1860 1861 /* read in column indices */ 1862 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1863 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1864 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1865 1866 /* loop over row lengths determining block row lengths */ 1867 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1868 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1869 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1870 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1871 masked = mask + mbs; 1872 rowcount = 0; nzcount = 0; 1873 for (i=0; i<mbs; i++) { 1874 nmask = 0; 1875 for (j=0; j<bs; j++) { 1876 kmax = rowlengths[rowcount]; 1877 for (k=0; k<kmax; k++) { 1878 tmp = jj[nzcount++]/bs; /* block col. index */ 1879 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1880 } 1881 rowcount++; 1882 } 1883 s_browlengths[i] += nmask; 1884 1885 /* zero out the mask elements we set */ 1886 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1887 } 1888 1889 /* create our matrix */ 1890 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1891 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1892 ierr = MatSetType(B,type);CHKERRQ(ierr); 1893 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1894 a = (Mat_SeqSBAIJ*)B->data; 1895 1896 /* set matrix "i" values */ 1897 a->i[0] = 0; 1898 for (i=1; i<= mbs; i++) { 1899 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1900 a->ilen[i-1] = s_browlengths[i-1]; 1901 } 1902 a->nz = a->i[mbs]; 1903 1904 /* read in nonzero values */ 1905 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1906 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1907 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1908 1909 /* set "a" and "j" values into matrix */ 1910 nzcount = 0; jcount = 0; 1911 for (i=0; i<mbs; i++) { 1912 nzcountb = nzcount; 1913 nmask = 0; 1914 for (j=0; j<bs; j++) { 1915 kmax = rowlengths[i*bs+j]; 1916 for (k=0; k<kmax; k++) { 1917 tmp = jj[nzcount++]/bs; /* block col. index */ 1918 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1919 } 1920 } 1921 /* sort the masked values */ 1922 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1923 1924 /* set "j" values into matrix */ 1925 maskcount = 1; 1926 for (j=0; j<nmask; j++) { 1927 a->j[jcount++] = masked[j]; 1928 mask[masked[j]] = maskcount++; 1929 } 1930 1931 /* set "a" values into matrix */ 1932 ishift = bs2*a->i[i]; 1933 for (j=0; j<bs; j++) { 1934 kmax = rowlengths[i*bs+j]; 1935 for (k=0; k<kmax; k++) { 1936 tmp = jj[nzcountb]/bs ; /* block col. index */ 1937 if (tmp >= i){ 1938 block = mask[tmp] - 1; 1939 point = jj[nzcountb] - bs*tmp; 1940 idx = ishift + bs2*block + j + bs*point; 1941 a->a[idx] = aa[nzcountb]; 1942 } 1943 nzcountb++; 1944 } 1945 } 1946 /* zero out the mask elements we set */ 1947 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1948 } 1949 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1950 1951 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1952 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1953 ierr = PetscFree(aa);CHKERRQ(ierr); 1954 ierr = PetscFree(jj);CHKERRQ(ierr); 1955 ierr = PetscFree(mask);CHKERRQ(ierr); 1956 1957 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1958 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1959 ierr = MatView_Private(B);CHKERRQ(ierr); 1960 *A = B; 1961 PetscFunctionReturn(0); 1962 } 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1966 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1967 { 1968 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1969 MatScalar *aa=a->a,*v,*v1; 1970 PetscScalar *x,*b,*t,sum,d; 1971 PetscErrorCode ierr; 1972 PetscInt m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j; 1973 PetscInt nz,nz1,*vj,*vj1,i; 1974 1975 PetscFunctionBegin; 1976 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 1977 1978 its = its*lits; 1979 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1980 1981 if (bs > 1) 1982 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1983 1984 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1985 if (xx != bb) { 1986 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1987 } else { 1988 b = x; 1989 } 1990 1991 if (!a->relax_work) { 1992 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 1993 } 1994 t = a->relax_work; 1995 1996 if (flag & SOR_ZERO_INITIAL_GUESS) { 1997 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1998 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 1999 2000 for (i=0; i<m; i++){ 2001 d = *(aa + ai[i]); /* diag[i] */ 2002 v = aa + ai[i] + 1; 2003 vj = aj + ai[i] + 1; 2004 nz = ai[i+1] - ai[i] - 1; 2005 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2006 x[i] = omega*t[i]/d; 2007 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2008 } 2009 } 2010 2011 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2012 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 2013 t = b; 2014 } 2015 2016 for (i=m-1; i>=0; i--){ 2017 d = *(aa + ai[i]); 2018 v = aa + ai[i] + 1; 2019 vj = aj + ai[i] + 1; 2020 nz = ai[i+1] - ai[i] - 1; 2021 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2022 sum = t[i]; 2023 while (nz--) sum -= x[*vj++]*(*v++); 2024 x[i] = (1-omega)*x[i] + omega*sum/d; 2025 } 2026 t = a->relax_work; 2027 } 2028 its--; 2029 } 2030 2031 while (its--) { 2032 /* 2033 forward sweep: 2034 for i=0,...,m-1: 2035 sum[i] = (b[i] - U(i,:)x )/d[i]; 2036 x[i] = (1-omega)x[i] + omega*sum[i]; 2037 b = b - x[i]*U^T(i,:); 2038 2039 */ 2040 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2041 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2042 2043 for (i=0; i<m; i++){ 2044 d = *(aa + ai[i]); /* diag[i] */ 2045 v = aa + ai[i] + 1; v1=v; 2046 vj = aj + ai[i] + 1; vj1=vj; 2047 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2048 sum = t[i]; 2049 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2050 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2051 x[i] = (1-omega)*x[i] + omega*sum/d; 2052 while (nz--) t[*vj++] -= x[i]*(*v++); 2053 } 2054 } 2055 2056 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2057 /* 2058 backward sweep: 2059 b = b - x[i]*U^T(i,:), i=0,...,n-2 2060 for i=m-1,...,0: 2061 sum[i] = (b[i] - U(i,:)x )/d[i]; 2062 x[i] = (1-omega)x[i] + omega*sum[i]; 2063 */ 2064 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2065 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2066 2067 for (i=0; i<m-1; i++){ /* update rhs */ 2068 v = aa + ai[i] + 1; 2069 vj = aj + ai[i] + 1; 2070 nz = ai[i+1] - ai[i] - 1; 2071 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2072 while (nz--) t[*vj++] -= x[i]*(*v++); 2073 } 2074 for (i=m-1; i>=0; i--){ 2075 d = *(aa + ai[i]); 2076 v = aa + ai[i] + 1; 2077 vj = aj + ai[i] + 1; 2078 nz = ai[i+1] - ai[i] - 1; 2079 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2080 sum = t[i]; 2081 while (nz--) sum -= x[*vj++]*(*v++); 2082 x[i] = (1-omega)*x[i] + omega*sum/d; 2083 } 2084 } 2085 } 2086 2087 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2088 if (bb != xx) { 2089 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2090 } 2091 PetscFunctionReturn(0); 2092 } 2093 2094 #undef __FUNCT__ 2095 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2096 /*@ 2097 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2098 (upper triangular entries in CSR format) provided by the user. 2099 2100 Collective on MPI_Comm 2101 2102 Input Parameters: 2103 + comm - must be an MPI communicator of size 1 2104 . bs - size of block 2105 . m - number of rows 2106 . n - number of columns 2107 . i - row indices 2108 . j - column indices 2109 - a - matrix values 2110 2111 Output Parameter: 2112 . mat - the matrix 2113 2114 Level: intermediate 2115 2116 Notes: 2117 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2118 once the matrix is destroyed 2119 2120 You cannot set new nonzero locations into this matrix, that will generate an error. 2121 2122 The i and j indices are 0 based 2123 2124 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2125 2126 @*/ 2127 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2128 { 2129 PetscErrorCode ierr; 2130 PetscInt ii; 2131 Mat_SeqSBAIJ *sbaij; 2132 2133 PetscFunctionBegin; 2134 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2135 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2136 2137 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2138 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2139 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2140 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2141 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2142 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2143 2144 sbaij->i = i; 2145 sbaij->j = j; 2146 sbaij->a = a; 2147 sbaij->singlemalloc = PETSC_FALSE; 2148 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2149 sbaij->free_a = PETSC_FALSE; 2150 sbaij->free_ij = PETSC_FALSE; 2151 2152 for (ii=0; ii<m; ii++) { 2153 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2154 #if defined(PETSC_USE_DEBUG) 2155 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2156 #endif 2157 } 2158 #if defined(PETSC_USE_DEBUG) 2159 for (ii=0; ii<sbaij->i[m]; ii++) { 2160 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2161 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2162 } 2163 #endif 2164 2165 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2166 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 2171 2172 2173 2174