1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* UGLY, ugly, ugly 13 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 case MAT_USE_SINGLE_PRECISION_SOLVES: 214 if (a->bs==4) { 215 PetscTruth sse_enabled_local,sse_enabled_global; 216 int ierr; 217 218 sse_enabled_local = PETSC_FALSE; 219 sse_enabled_global = PETSC_FALSE; 220 221 ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 222 #if defined(PETSC_HAVE_SSE) 223 if (sse_enabled_local) { 224 a->single_precision_solves = PETSC_TRUE; 225 A->ops->solve = MatSolve_SeqBAIJ_Update; 226 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 227 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 228 break; 229 } else { 230 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 231 } 232 #else 233 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 234 #endif 235 } 236 break; 237 default: 238 SETERRQ(PETSC_ERR_SUP,"unknown option"); 239 } 240 PetscFunctionReturn(0); 241 } 242 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatGetRow_SeqBAIJ" 245 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 246 { 247 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 248 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 249 MatScalar *aa,*aa_i; 250 PetscScalar *v_i; 251 252 PetscFunctionBegin; 253 bs = a->bs; 254 ai = a->i; 255 aj = a->j; 256 aa = a->a; 257 bs2 = a->bs2; 258 259 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 260 261 bn = row/bs; /* Block number */ 262 bp = row % bs; /* Block Position */ 263 M = ai[bn+1] - ai[bn]; 264 *nz = bs*M; 265 266 if (v) { 267 *v = 0; 268 if (*nz) { 269 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 270 for (i=0; i<M; i++) { /* for each block in the block row */ 271 v_i = *v + i*bs; 272 aa_i = aa + bs2*(ai[bn] + i); 273 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 274 } 275 } 276 } 277 278 if (idx) { 279 *idx = 0; 280 if (*nz) { 281 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 282 for (i=0; i<M; i++) { /* for each block in the block row */ 283 idx_i = *idx + i*bs; 284 itmp = bs*aj[ai[bn] + i]; 285 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 286 } 287 } 288 } 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 294 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 295 { 296 int ierr; 297 298 PetscFunctionBegin; 299 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 300 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 301 PetscFunctionReturn(0); 302 } 303 304 #undef __FUNCT__ 305 #define __FUNCT__ "MatTranspose_SeqBAIJ" 306 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 307 { 308 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 309 Mat C; 310 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 311 int *rows,*cols,bs2=a->bs2; 312 PetscScalar *array; 313 314 PetscFunctionBegin; 315 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 316 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 317 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 318 319 #if defined(PETSC_USE_MAT_SINGLE) 320 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 321 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 322 #else 323 array = a->a; 324 #endif 325 326 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 327 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 328 ierr = PetscFree(col);CHKERRQ(ierr); 329 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 330 cols = rows + bs; 331 for (i=0; i<mbs; i++) { 332 cols[0] = i*bs; 333 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 334 len = ai[i+1] - ai[i]; 335 for (j=0; j<len; j++) { 336 rows[0] = (*aj++)*bs; 337 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 338 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 339 array += bs2; 340 } 341 } 342 ierr = PetscFree(rows);CHKERRQ(ierr); 343 #if defined(PETSC_USE_MAT_SINGLE) 344 ierr = PetscFree(array); 345 #endif 346 347 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 349 350 if (B) { 351 *B = C; 352 } else { 353 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 354 } 355 PetscFunctionReturn(0); 356 } 357 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 360 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 361 { 362 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 363 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 364 PetscScalar *aa; 365 FILE *file; 366 367 PetscFunctionBegin; 368 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 369 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 370 col_lens[0] = MAT_FILE_COOKIE; 371 372 col_lens[1] = A->m; 373 col_lens[2] = A->n; 374 col_lens[3] = a->nz*bs2; 375 376 /* store lengths of each row and write (including header) to file */ 377 count = 0; 378 for (i=0; i<a->mbs; i++) { 379 for (j=0; j<bs; j++) { 380 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 381 } 382 } 383 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 384 ierr = PetscFree(col_lens);CHKERRQ(ierr); 385 386 /* store column indices (zero start index) */ 387 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 388 count = 0; 389 for (i=0; i<a->mbs; i++) { 390 for (j=0; j<bs; j++) { 391 for (k=a->i[i]; k<a->i[i+1]; k++) { 392 for (l=0; l<bs; l++) { 393 jj[count++] = bs*a->j[k] + l; 394 } 395 } 396 } 397 } 398 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 399 ierr = PetscFree(jj);CHKERRQ(ierr); 400 401 /* store nonzero values */ 402 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 403 count = 0; 404 for (i=0; i<a->mbs; i++) { 405 for (j=0; j<bs; j++) { 406 for (k=a->i[i]; k<a->i[i+1]; k++) { 407 for (l=0; l<bs; l++) { 408 aa[count++] = a->a[bs2*k + l*bs + j]; 409 } 410 } 411 } 412 } 413 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 414 ierr = PetscFree(aa);CHKERRQ(ierr); 415 416 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 417 if (file) { 418 fprintf(file,"-matload_block_size %d\n",a->bs); 419 } 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 425 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 426 { 427 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 428 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 429 PetscViewerFormat format; 430 431 PetscFunctionBegin; 432 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 433 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 434 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 435 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 436 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 437 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 438 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 439 for (i=0; i<a->mbs; i++) { 440 for (j=0; j<bs; j++) { 441 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 442 for (k=a->i[i]; k<a->i[i+1]; k++) { 443 for (l=0; l<bs; l++) { 444 #if defined(PETSC_USE_COMPLEX) 445 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 446 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 447 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 448 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 449 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 450 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 451 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 452 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 453 } 454 #else 455 if (a->a[bs2*k + l*bs + j] != 0.0) { 456 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 457 } 458 #endif 459 } 460 } 461 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 462 } 463 } 464 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 465 } else { 466 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 467 for (i=0; i<a->mbs; i++) { 468 for (j=0; j<bs; j++) { 469 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 470 for (k=a->i[i]; k<a->i[i+1]; k++) { 471 for (l=0; l<bs; l++) { 472 #if defined(PETSC_USE_COMPLEX) 473 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 474 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 475 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 476 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 477 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 478 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 479 } else { 480 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 481 } 482 #else 483 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 484 #endif 485 } 486 } 487 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 488 } 489 } 490 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 491 } 492 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 498 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 499 { 500 Mat A = (Mat) Aa; 501 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 502 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 503 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 504 MatScalar *aa; 505 PetscViewer viewer; 506 507 PetscFunctionBegin; 508 509 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 510 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 511 512 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 513 514 /* loop over matrix elements drawing boxes */ 515 color = PETSC_DRAW_BLUE; 516 for (i=0,row=0; i<mbs; i++,row+=bs) { 517 for (j=a->i[i]; j<a->i[i+1]; j++) { 518 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 519 x_l = a->j[j]*bs; x_r = x_l + 1.0; 520 aa = a->a + j*bs2; 521 for (k=0; k<bs; k++) { 522 for (l=0; l<bs; l++) { 523 if (PetscRealPart(*aa++) >= 0.) continue; 524 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 525 } 526 } 527 } 528 } 529 color = PETSC_DRAW_CYAN; 530 for (i=0,row=0; i<mbs; i++,row+=bs) { 531 for (j=a->i[i]; j<a->i[i+1]; j++) { 532 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 533 x_l = a->j[j]*bs; x_r = x_l + 1.0; 534 aa = a->a + j*bs2; 535 for (k=0; k<bs; k++) { 536 for (l=0; l<bs; l++) { 537 if (PetscRealPart(*aa++) != 0.) continue; 538 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 544 color = PETSC_DRAW_RED; 545 for (i=0,row=0; i<mbs; i++,row+=bs) { 546 for (j=a->i[i]; j<a->i[i+1]; j++) { 547 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 548 x_l = a->j[j]*bs; x_r = x_l + 1.0; 549 aa = a->a + j*bs2; 550 for (k=0; k<bs; k++) { 551 for (l=0; l<bs; l++) { 552 if (PetscRealPart(*aa++) <= 0.) continue; 553 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 554 } 555 } 556 } 557 } 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 563 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 564 { 565 int ierr; 566 PetscReal xl,yl,xr,yr,w,h; 567 PetscDraw draw; 568 PetscTruth isnull; 569 570 PetscFunctionBegin; 571 572 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 573 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 574 575 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 576 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 577 xr += w; yr += h; xl = -w; yl = -h; 578 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 579 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 580 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatView_SeqBAIJ" 586 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 587 { 588 int ierr; 589 PetscTruth issocket,isascii,isbinary,isdraw; 590 591 PetscFunctionBegin; 592 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 593 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 594 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 595 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 596 if (issocket) { 597 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 598 } else if (isascii){ 599 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 600 } else if (isbinary) { 601 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 602 } else if (isdraw) { 603 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 604 } else { 605 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 606 } 607 PetscFunctionReturn(0); 608 } 609 610 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatGetValues_SeqBAIJ" 613 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 614 { 615 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 616 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 617 int *ai = a->i,*ailen = a->ilen; 618 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 619 MatScalar *ap,*aa = a->a,zero = 0.0; 620 621 PetscFunctionBegin; 622 for (k=0; k<m; k++) { /* loop over rows */ 623 row = im[k]; brow = row/bs; 624 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 625 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 626 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 627 nrow = ailen[brow]; 628 for (l=0; l<n; l++) { /* loop over columns */ 629 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 630 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 631 col = in[l] ; 632 bcol = col/bs; 633 cidx = col%bs; 634 ridx = row%bs; 635 high = nrow; 636 low = 0; /* assume unsorted */ 637 while (high-low > 5) { 638 t = (low+high)/2; 639 if (rp[t] > bcol) high = t; 640 else low = t; 641 } 642 for (i=low; i<high; i++) { 643 if (rp[i] > bcol) break; 644 if (rp[i] == bcol) { 645 *v++ = ap[bs2*i+bs*cidx+ridx]; 646 goto finished; 647 } 648 } 649 *v++ = zero; 650 finished:; 651 } 652 } 653 PetscFunctionReturn(0); 654 } 655 656 #if defined(PETSC_USE_MAT_SINGLE) 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 659 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 660 { 661 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 662 int ierr,i,N = m*n*b->bs2; 663 MatScalar *vsingle; 664 665 PetscFunctionBegin; 666 if (N > b->setvalueslen) { 667 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 668 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 669 b->setvalueslen = N; 670 } 671 vsingle = b->setvaluescopy; 672 for (i=0; i<N; i++) { 673 vsingle[i] = v[i]; 674 } 675 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 #endif 679 680 681 #undef __FUNCT__ 682 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 683 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 684 { 685 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 686 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 687 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 688 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 689 PetscTruth roworiented=a->roworiented; 690 MatScalar *value = v,*ap,*aa = a->a,*bap; 691 692 PetscFunctionBegin; 693 if (roworiented) { 694 stepval = (n-1)*bs; 695 } else { 696 stepval = (m-1)*bs; 697 } 698 for (k=0; k<m; k++) { /* loop over added rows */ 699 row = im[k]; 700 if (row < 0) continue; 701 #if defined(PETSC_USE_BOPT_g) 702 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 703 #endif 704 rp = aj + ai[row]; 705 ap = aa + bs2*ai[row]; 706 rmax = imax[row]; 707 nrow = ailen[row]; 708 low = 0; 709 for (l=0; l<n; l++) { /* loop over added columns */ 710 if (in[l] < 0) continue; 711 #if defined(PETSC_USE_BOPT_g) 712 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 713 #endif 714 col = in[l]; 715 if (roworiented) { 716 value = v + k*(stepval+bs)*bs + l*bs; 717 } else { 718 value = v + l*(stepval+bs)*bs + k*bs; 719 } 720 if (!sorted) low = 0; high = nrow; 721 while (high-low > 7) { 722 t = (low+high)/2; 723 if (rp[t] > col) high = t; 724 else low = t; 725 } 726 for (i=low; i<high; i++) { 727 if (rp[i] > col) break; 728 if (rp[i] == col) { 729 bap = ap + bs2*i; 730 if (roworiented) { 731 if (is == ADD_VALUES) { 732 for (ii=0; ii<bs; ii++,value+=stepval) { 733 for (jj=ii; jj<bs2; jj+=bs) { 734 bap[jj] += *value++; 735 } 736 } 737 } else { 738 for (ii=0; ii<bs; ii++,value+=stepval) { 739 for (jj=ii; jj<bs2; jj+=bs) { 740 bap[jj] = *value++; 741 } 742 } 743 } 744 } else { 745 if (is == ADD_VALUES) { 746 for (ii=0; ii<bs; ii++,value+=stepval) { 747 for (jj=0; jj<bs; jj++) { 748 *bap++ += *value++; 749 } 750 } 751 } else { 752 for (ii=0; ii<bs; ii++,value+=stepval) { 753 for (jj=0; jj<bs; jj++) { 754 *bap++ = *value++; 755 } 756 } 757 } 758 } 759 goto noinsert2; 760 } 761 } 762 if (nonew == 1) goto noinsert2; 763 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 764 if (nrow >= rmax) { 765 /* there is no extra room in row, therefore enlarge */ 766 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 767 MatScalar *new_a; 768 769 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 770 771 /* malloc new storage space */ 772 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 773 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 774 new_j = (int*)(new_a + bs2*new_nz); 775 new_i = new_j + new_nz; 776 777 /* copy over old data into new slots */ 778 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 779 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 780 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 781 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 782 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 783 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 784 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 785 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 786 /* free up old matrix storage */ 787 ierr = PetscFree(a->a);CHKERRQ(ierr); 788 if (!a->singlemalloc) { 789 ierr = PetscFree(a->i);CHKERRQ(ierr); 790 ierr = PetscFree(a->j);CHKERRQ(ierr); 791 } 792 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 793 a->singlemalloc = PETSC_TRUE; 794 795 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 796 rmax = imax[row] = imax[row] + CHUNKSIZE; 797 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 798 a->maxnz += bs2*CHUNKSIZE; 799 a->reallocs++; 800 a->nz++; 801 } 802 N = nrow++ - 1; 803 /* shift up all the later entries in this row */ 804 for (ii=N; ii>=i; ii--) { 805 rp[ii+1] = rp[ii]; 806 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 807 } 808 if (N >= i) { 809 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 810 } 811 rp[i] = col; 812 bap = ap + bs2*i; 813 if (roworiented) { 814 for (ii=0; ii<bs; ii++,value+=stepval) { 815 for (jj=ii; jj<bs2; jj+=bs) { 816 bap[jj] = *value++; 817 } 818 } 819 } else { 820 for (ii=0; ii<bs; ii++,value+=stepval) { 821 for (jj=0; jj<bs; jj++) { 822 *bap++ = *value++; 823 } 824 } 825 } 826 noinsert2:; 827 low = i; 828 } 829 ailen[row] = nrow; 830 } 831 PetscFunctionReturn(0); 832 } 833 834 835 #undef __FUNCT__ 836 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 837 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 838 { 839 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 840 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 841 int m = A->m,*ip,N,*ailen = a->ilen; 842 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 843 MatScalar *aa = a->a,*ap; 844 845 PetscFunctionBegin; 846 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 847 848 if (m) rmax = ailen[0]; 849 for (i=1; i<mbs; i++) { 850 /* move each row back by the amount of empty slots (fshift) before it*/ 851 fshift += imax[i-1] - ailen[i-1]; 852 rmax = PetscMax(rmax,ailen[i]); 853 if (fshift) { 854 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 855 N = ailen[i]; 856 for (j=0; j<N; j++) { 857 ip[j-fshift] = ip[j]; 858 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 859 } 860 } 861 ai[i] = ai[i-1] + ailen[i-1]; 862 } 863 if (mbs) { 864 fshift += imax[mbs-1] - ailen[mbs-1]; 865 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 866 } 867 /* reset ilen and imax for each row */ 868 for (i=0; i<mbs; i++) { 869 ailen[i] = imax[i] = ai[i+1] - ai[i]; 870 } 871 a->nz = ai[mbs]; 872 873 /* diagonals may have moved, so kill the diagonal pointers */ 874 if (fshift && a->diag) { 875 ierr = PetscFree(a->diag);CHKERRQ(ierr); 876 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 877 a->diag = 0; 878 } 879 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 880 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 881 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 882 a->reallocs = 0; 883 A->info.nz_unneeded = (PetscReal)fshift*bs2; 884 885 PetscFunctionReturn(0); 886 } 887 888 889 890 /* 891 This function returns an array of flags which indicate the locations of contiguous 892 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 893 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 894 Assume: sizes should be long enough to hold all the values. 895 */ 896 #undef __FUNCT__ 897 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 898 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 899 { 900 int i,j,k,row; 901 PetscTruth flg; 902 903 PetscFunctionBegin; 904 for (i=0,j=0; i<n; j++) { 905 row = idx[i]; 906 if (row%bs!=0) { /* Not the begining of a block */ 907 sizes[j] = 1; 908 i++; 909 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 910 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 911 i++; 912 } else { /* Begining of the block, so check if the complete block exists */ 913 flg = PETSC_TRUE; 914 for (k=1; k<bs; k++) { 915 if (row+k != idx[i+k]) { /* break in the block */ 916 flg = PETSC_FALSE; 917 break; 918 } 919 } 920 if (flg == PETSC_TRUE) { /* No break in the bs */ 921 sizes[j] = bs; 922 i+= bs; 923 } else { 924 sizes[j] = 1; 925 i++; 926 } 927 } 928 } 929 *bs_max = j; 930 PetscFunctionReturn(0); 931 } 932 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 935 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 936 { 937 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 938 int ierr,i,j,k,count,is_n,*is_idx,*rows; 939 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 940 PetscScalar zero = 0.0; 941 MatScalar *aa; 942 943 PetscFunctionBegin; 944 /* Make a copy of the IS and sort it */ 945 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 946 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 947 948 /* allocate memory for rows,sizes */ 949 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 950 sizes = rows + is_n; 951 952 /* copy IS values to rows, and sort them */ 953 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 954 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 955 if (baij->keepzeroedrows) { 956 for (i=0; i<is_n; i++) { sizes[i] = 1; } 957 bs_max = is_n; 958 } else { 959 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 960 } 961 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 962 963 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 964 row = rows[j]; 965 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 966 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 967 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 968 if (sizes[i] == bs && !baij->keepzeroedrows) { 969 if (diag) { 970 if (baij->ilen[row/bs] > 0) { 971 baij->ilen[row/bs] = 1; 972 baij->j[baij->i[row/bs]] = row/bs; 973 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 974 } 975 /* Now insert all the diagonal values for this bs */ 976 for (k=0; k<bs; k++) { 977 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 978 } 979 } else { /* (!diag) */ 980 baij->ilen[row/bs] = 0; 981 } /* end (!diag) */ 982 } else { /* (sizes[i] != bs) */ 983 #if defined (PETSC_USE_DEBUG) 984 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 985 #endif 986 for (k=0; k<count; k++) { 987 aa[0] = zero; 988 aa += bs; 989 } 990 if (diag) { 991 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 992 } 993 } 994 } 995 996 ierr = PetscFree(rows);CHKERRQ(ierr); 997 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1003 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1004 { 1005 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1006 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1007 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1008 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1009 int ridx,cidx,bs2=a->bs2,ierr; 1010 PetscTruth roworiented=a->roworiented; 1011 MatScalar *ap,value,*aa=a->a,*bap; 1012 1013 PetscFunctionBegin; 1014 for (k=0; k<m; k++) { /* loop over added rows */ 1015 row = im[k]; brow = row/bs; 1016 if (row < 0) continue; 1017 #if defined(PETSC_USE_BOPT_g) 1018 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1019 #endif 1020 rp = aj + ai[brow]; 1021 ap = aa + bs2*ai[brow]; 1022 rmax = imax[brow]; 1023 nrow = ailen[brow]; 1024 low = 0; 1025 for (l=0; l<n; l++) { /* loop over added columns */ 1026 if (in[l] < 0) continue; 1027 #if defined(PETSC_USE_BOPT_g) 1028 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1029 #endif 1030 col = in[l]; bcol = col/bs; 1031 ridx = row % bs; cidx = col % bs; 1032 if (roworiented) { 1033 value = v[l + k*n]; 1034 } else { 1035 value = v[k + l*m]; 1036 } 1037 if (!sorted) low = 0; high = nrow; 1038 while (high-low > 7) { 1039 t = (low+high)/2; 1040 if (rp[t] > bcol) high = t; 1041 else low = t; 1042 } 1043 for (i=low; i<high; i++) { 1044 if (rp[i] > bcol) break; 1045 if (rp[i] == bcol) { 1046 bap = ap + bs2*i + bs*cidx + ridx; 1047 if (is == ADD_VALUES) *bap += value; 1048 else *bap = value; 1049 goto noinsert1; 1050 } 1051 } 1052 if (nonew == 1) goto noinsert1; 1053 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1054 if (nrow >= rmax) { 1055 /* there is no extra room in row, therefore enlarge */ 1056 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1057 MatScalar *new_a; 1058 1059 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1060 1061 /* Malloc new storage space */ 1062 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1063 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1064 new_j = (int*)(new_a + bs2*new_nz); 1065 new_i = new_j + new_nz; 1066 1067 /* copy over old data into new slots */ 1068 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1069 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1070 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1071 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1072 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1073 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1074 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1075 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1076 /* free up old matrix storage */ 1077 ierr = PetscFree(a->a);CHKERRQ(ierr); 1078 if (!a->singlemalloc) { 1079 ierr = PetscFree(a->i);CHKERRQ(ierr); 1080 ierr = PetscFree(a->j);CHKERRQ(ierr); 1081 } 1082 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1083 a->singlemalloc = PETSC_TRUE; 1084 1085 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1086 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1087 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1088 a->maxnz += bs2*CHUNKSIZE; 1089 a->reallocs++; 1090 a->nz++; 1091 } 1092 N = nrow++ - 1; 1093 /* shift up all the later entries in this row */ 1094 for (ii=N; ii>=i; ii--) { 1095 rp[ii+1] = rp[ii]; 1096 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1097 } 1098 if (N>=i) { 1099 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1100 } 1101 rp[i] = bcol; 1102 ap[bs2*i + bs*cidx + ridx] = value; 1103 noinsert1:; 1104 low = i; 1105 } 1106 ailen[brow] = nrow; 1107 } 1108 PetscFunctionReturn(0); 1109 } 1110 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1114 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1115 { 1116 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1117 Mat outA; 1118 int ierr; 1119 PetscTruth row_identity,col_identity; 1120 1121 PetscFunctionBegin; 1122 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1123 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1124 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1125 if (!row_identity || !col_identity) { 1126 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1127 } 1128 1129 outA = inA; 1130 inA->factor = FACTOR_LU; 1131 1132 if (!a->diag) { 1133 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1134 } 1135 1136 a->row = row; 1137 a->col = col; 1138 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1139 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1140 1141 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1142 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1143 PetscLogObjectParent(inA,a->icol); 1144 1145 /* 1146 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1147 for ILU(0) factorization with natural ordering 1148 */ 1149 if (a->bs < 8) { 1150 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1151 } else { 1152 if (!a->solve_work) { 1153 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1154 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1155 } 1156 } 1157 1158 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1159 1160 PetscFunctionReturn(0); 1161 } 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1164 int MatPrintHelp_SeqBAIJ(Mat A) 1165 { 1166 static PetscTruth called = PETSC_FALSE; 1167 MPI_Comm comm = A->comm; 1168 int ierr; 1169 1170 PetscFunctionBegin; 1171 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1172 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1173 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 EXTERN_C_BEGIN 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1180 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1181 { 1182 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1183 int i,nz,nbs; 1184 1185 PetscFunctionBegin; 1186 nz = baij->maxnz/baij->bs2; 1187 nbs = baij->nbs; 1188 for (i=0; i<nz; i++) { 1189 baij->j[i] = indices[i]; 1190 } 1191 baij->nz = nz; 1192 for (i=0; i<nbs; i++) { 1193 baij->ilen[i] = baij->imax[i]; 1194 } 1195 1196 PetscFunctionReturn(0); 1197 } 1198 EXTERN_C_END 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1202 /*@ 1203 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1204 in the matrix. 1205 1206 Input Parameters: 1207 + mat - the SeqBAIJ matrix 1208 - indices - the column indices 1209 1210 Level: advanced 1211 1212 Notes: 1213 This can be called if you have precomputed the nonzero structure of the 1214 matrix and want to provide it to the matrix object to improve the performance 1215 of the MatSetValues() operation. 1216 1217 You MUST have set the correct numbers of nonzeros per row in the call to 1218 MatCreateSeqBAIJ(). 1219 1220 MUST be called before any calls to MatSetValues(); 1221 1222 @*/ 1223 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1224 { 1225 int ierr,(*f)(Mat,int *); 1226 1227 PetscFunctionBegin; 1228 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1229 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1230 if (f) { 1231 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1232 } else { 1233 SETERRQ(1,"Wrong type of matrix to set column indices"); 1234 } 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1240 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1241 { 1242 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1243 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1244 PetscReal atmp; 1245 PetscScalar *x,zero = 0.0; 1246 MatScalar *aa; 1247 int ncols,brow,krow,kcol; 1248 1249 PetscFunctionBegin; 1250 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1251 bs = a->bs; 1252 aa = a->a; 1253 ai = a->i; 1254 aj = a->j; 1255 mbs = a->mbs; 1256 1257 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1258 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1259 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1260 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1261 for (i=0; i<mbs; i++) { 1262 ncols = ai[1] - ai[0]; ai++; 1263 brow = bs*i; 1264 for (j=0; j<ncols; j++){ 1265 /* bcol = bs*(*aj); */ 1266 for (kcol=0; kcol<bs; kcol++){ 1267 for (krow=0; krow<bs; krow++){ 1268 atmp = PetscAbsScalar(*aa); aa++; 1269 row = brow + krow; /* row index */ 1270 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1271 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1272 } 1273 } 1274 aj++; 1275 } 1276 } 1277 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1283 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1284 { 1285 int ierr; 1286 1287 PetscFunctionBegin; 1288 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1294 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1295 { 1296 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1297 PetscFunctionBegin; 1298 *array = a->a; 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1304 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1305 { 1306 PetscFunctionBegin; 1307 PetscFunctionReturn(0); 1308 } 1309 1310 /* -------------------------------------------------------------------*/ 1311 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1312 MatGetRow_SeqBAIJ, 1313 MatRestoreRow_SeqBAIJ, 1314 MatMult_SeqBAIJ_N, 1315 MatMultAdd_SeqBAIJ_N, 1316 MatMultTranspose_SeqBAIJ, 1317 MatMultTransposeAdd_SeqBAIJ, 1318 MatSolve_SeqBAIJ_N, 1319 0, 1320 0, 1321 0, 1322 MatLUFactor_SeqBAIJ, 1323 0, 1324 0, 1325 MatTranspose_SeqBAIJ, 1326 MatGetInfo_SeqBAIJ, 1327 MatEqual_SeqBAIJ, 1328 MatGetDiagonal_SeqBAIJ, 1329 MatDiagonalScale_SeqBAIJ, 1330 MatNorm_SeqBAIJ, 1331 0, 1332 MatAssemblyEnd_SeqBAIJ, 1333 0, 1334 MatSetOption_SeqBAIJ, 1335 MatZeroEntries_SeqBAIJ, 1336 MatZeroRows_SeqBAIJ, 1337 MatLUFactorSymbolic_SeqBAIJ, 1338 MatLUFactorNumeric_SeqBAIJ_N, 1339 0, 1340 0, 1341 MatSetUpPreallocation_SeqBAIJ, 1342 MatILUFactorSymbolic_SeqBAIJ, 1343 0, 1344 MatGetArray_SeqBAIJ, 1345 MatRestoreArray_SeqBAIJ, 1346 MatDuplicate_SeqBAIJ, 1347 0, 1348 0, 1349 MatILUFactor_SeqBAIJ, 1350 0, 1351 0, 1352 MatGetSubMatrices_SeqBAIJ, 1353 MatIncreaseOverlap_SeqBAIJ, 1354 MatGetValues_SeqBAIJ, 1355 0, 1356 MatPrintHelp_SeqBAIJ, 1357 MatScale_SeqBAIJ, 1358 0, 1359 0, 1360 0, 1361 MatGetBlockSize_SeqBAIJ, 1362 MatGetRowIJ_SeqBAIJ, 1363 MatRestoreRowIJ_SeqBAIJ, 1364 0, 1365 0, 1366 0, 1367 0, 1368 0, 1369 0, 1370 MatSetValuesBlocked_SeqBAIJ, 1371 MatGetSubMatrix_SeqBAIJ, 1372 MatDestroy_SeqBAIJ, 1373 MatView_SeqBAIJ, 1374 MatGetPetscMaps_Petsc, 1375 0, 1376 0, 1377 0, 1378 0, 1379 0, 1380 0, 1381 MatGetRowMax_SeqBAIJ, 1382 MatConvert_Basic}; 1383 1384 EXTERN_C_BEGIN 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1387 int MatStoreValues_SeqBAIJ(Mat mat) 1388 { 1389 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1390 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1391 int ierr; 1392 1393 PetscFunctionBegin; 1394 if (aij->nonew != 1) { 1395 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1396 } 1397 1398 /* allocate space for values if not already there */ 1399 if (!aij->saved_values) { 1400 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1401 } 1402 1403 /* copy values over */ 1404 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 EXTERN_C_END 1408 1409 EXTERN_C_BEGIN 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1412 int MatRetrieveValues_SeqBAIJ(Mat mat) 1413 { 1414 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1415 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1416 1417 PetscFunctionBegin; 1418 if (aij->nonew != 1) { 1419 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1420 } 1421 if (!aij->saved_values) { 1422 SETERRQ(1,"Must call MatStoreValues(A);first"); 1423 } 1424 1425 /* copy values over */ 1426 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1427 PetscFunctionReturn(0); 1428 } 1429 EXTERN_C_END 1430 1431 EXTERN_C_BEGIN 1432 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1433 EXTERN_C_END 1434 1435 EXTERN_C_BEGIN 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatCreate_SeqBAIJ" 1438 int MatCreate_SeqBAIJ(Mat B) 1439 { 1440 int ierr,size; 1441 Mat_SeqBAIJ *b; 1442 1443 PetscFunctionBegin; 1444 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1445 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1446 1447 B->m = B->M = PetscMax(B->m,B->M); 1448 B->n = B->N = PetscMax(B->n,B->N); 1449 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1450 B->data = (void*)b; 1451 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1452 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1453 B->factor = 0; 1454 B->lupivotthreshold = 1.0; 1455 B->mapping = 0; 1456 b->row = 0; 1457 b->col = 0; 1458 b->icol = 0; 1459 b->reallocs = 0; 1460 b->saved_values = 0; 1461 #if defined(PETSC_USE_MAT_SINGLE) 1462 b->setvalueslen = 0; 1463 b->setvaluescopy = PETSC_NULL; 1464 #endif 1465 b->single_precision_solves = PETSC_FALSE; 1466 1467 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1468 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1469 1470 b->sorted = PETSC_FALSE; 1471 b->roworiented = PETSC_TRUE; 1472 b->nonew = 0; 1473 b->diag = 0; 1474 b->solve_work = 0; 1475 b->mult_work = 0; 1476 b->spptr = 0; 1477 B->info.nz_unneeded = (PetscReal)b->maxnz; 1478 b->keepzeroedrows = PETSC_FALSE; 1479 1480 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1481 "MatStoreValues_SeqBAIJ", 1482 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1483 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1484 "MatRetrieveValues_SeqBAIJ", 1485 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1486 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1487 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1488 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1489 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1490 "MatConvert_SeqBAIJ_SeqAIJ", 1491 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1492 PetscFunctionReturn(0); 1493 } 1494 EXTERN_C_END 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1498 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1499 { 1500 Mat C; 1501 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1502 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1503 1504 PetscFunctionBegin; 1505 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1506 1507 *B = 0; 1508 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1509 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1510 c = (Mat_SeqBAIJ*)C->data; 1511 1512 c->bs = a->bs; 1513 c->bs2 = a->bs2; 1514 c->mbs = a->mbs; 1515 c->nbs = a->nbs; 1516 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1517 1518 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1519 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1520 for (i=0; i<mbs; i++) { 1521 c->imax[i] = a->imax[i]; 1522 c->ilen[i] = a->ilen[i]; 1523 } 1524 1525 /* allocate the matrix space */ 1526 c->singlemalloc = PETSC_TRUE; 1527 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1528 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1529 c->j = (int*)(c->a + nz*bs2); 1530 c->i = c->j + nz; 1531 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1532 if (mbs > 0) { 1533 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1534 if (cpvalues == MAT_COPY_VALUES) { 1535 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1536 } else { 1537 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1538 } 1539 } 1540 1541 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1542 c->sorted = a->sorted; 1543 c->roworiented = a->roworiented; 1544 c->nonew = a->nonew; 1545 1546 if (a->diag) { 1547 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1548 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1549 for (i=0; i<mbs; i++) { 1550 c->diag[i] = a->diag[i]; 1551 } 1552 } else c->diag = 0; 1553 c->nz = a->nz; 1554 c->maxnz = a->maxnz; 1555 c->solve_work = 0; 1556 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1557 c->mult_work = 0; 1558 C->preallocated = PETSC_TRUE; 1559 C->assembled = PETSC_TRUE; 1560 *B = C; 1561 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1562 PetscFunctionReturn(0); 1563 } 1564 1565 EXTERN_C_BEGIN 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatLoad_SeqBAIJ" 1568 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1569 { 1570 Mat_SeqBAIJ *a; 1571 Mat B; 1572 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1573 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1574 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1575 int *masked,nmask,tmp,bs2,ishift; 1576 PetscScalar *aa; 1577 MPI_Comm comm = ((PetscObject)viewer)->comm; 1578 1579 PetscFunctionBegin; 1580 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1581 bs2 = bs*bs; 1582 1583 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1584 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1585 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1586 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1587 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1588 M = header[1]; N = header[2]; nz = header[3]; 1589 1590 if (header[3] < 0) { 1591 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1592 } 1593 1594 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1595 1596 /* 1597 This code adds extra rows to make sure the number of rows is 1598 divisible by the blocksize 1599 */ 1600 mbs = M/bs; 1601 extra_rows = bs - M + bs*(mbs); 1602 if (extra_rows == bs) extra_rows = 0; 1603 else mbs++; 1604 if (extra_rows) { 1605 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1606 } 1607 1608 /* read in row lengths */ 1609 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1610 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1611 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1612 1613 /* read in column indices */ 1614 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1615 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1616 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1617 1618 /* loop over row lengths determining block row lengths */ 1619 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1620 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1621 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1622 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1623 masked = mask + mbs; 1624 rowcount = 0; nzcount = 0; 1625 for (i=0; i<mbs; i++) { 1626 nmask = 0; 1627 for (j=0; j<bs; j++) { 1628 kmax = rowlengths[rowcount]; 1629 for (k=0; k<kmax; k++) { 1630 tmp = jj[nzcount++]/bs; 1631 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1632 } 1633 rowcount++; 1634 } 1635 browlengths[i] += nmask; 1636 /* zero out the mask elements we set */ 1637 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1638 } 1639 1640 /* create our matrix */ 1641 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1642 B = *A; 1643 a = (Mat_SeqBAIJ*)B->data; 1644 1645 /* set matrix "i" values */ 1646 a->i[0] = 0; 1647 for (i=1; i<= mbs; i++) { 1648 a->i[i] = a->i[i-1] + browlengths[i-1]; 1649 a->ilen[i-1] = browlengths[i-1]; 1650 } 1651 a->nz = 0; 1652 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1653 1654 /* read in nonzero values */ 1655 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1656 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1657 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1658 1659 /* set "a" and "j" values into matrix */ 1660 nzcount = 0; jcount = 0; 1661 for (i=0; i<mbs; i++) { 1662 nzcountb = nzcount; 1663 nmask = 0; 1664 for (j=0; j<bs; j++) { 1665 kmax = rowlengths[i*bs+j]; 1666 for (k=0; k<kmax; k++) { 1667 tmp = jj[nzcount++]/bs; 1668 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1669 } 1670 } 1671 /* sort the masked values */ 1672 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1673 1674 /* set "j" values into matrix */ 1675 maskcount = 1; 1676 for (j=0; j<nmask; j++) { 1677 a->j[jcount++] = masked[j]; 1678 mask[masked[j]] = maskcount++; 1679 } 1680 /* set "a" values into matrix */ 1681 ishift = bs2*a->i[i]; 1682 for (j=0; j<bs; j++) { 1683 kmax = rowlengths[i*bs+j]; 1684 for (k=0; k<kmax; k++) { 1685 tmp = jj[nzcountb]/bs ; 1686 block = mask[tmp] - 1; 1687 point = jj[nzcountb] - bs*tmp; 1688 idx = ishift + bs2*block + j + bs*point; 1689 a->a[idx] = (MatScalar)aa[nzcountb++]; 1690 } 1691 } 1692 /* zero out the mask elements we set */ 1693 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1694 } 1695 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1696 1697 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1698 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1699 ierr = PetscFree(aa);CHKERRQ(ierr); 1700 ierr = PetscFree(jj);CHKERRQ(ierr); 1701 ierr = PetscFree(mask);CHKERRQ(ierr); 1702 1703 B->assembled = PETSC_TRUE; 1704 1705 ierr = MatView_Private(B);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 EXTERN_C_END 1709 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatCreateSeqBAIJ" 1712 /*@C 1713 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1714 compressed row) format. For good matrix assembly performance the 1715 user should preallocate the matrix storage by setting the parameter nz 1716 (or the array nnz). By setting these parameters accurately, performance 1717 during matrix assembly can be increased by more than a factor of 50. 1718 1719 Collective on MPI_Comm 1720 1721 Input Parameters: 1722 + comm - MPI communicator, set to PETSC_COMM_SELF 1723 . bs - size of block 1724 . m - number of rows 1725 . n - number of columns 1726 . nz - number of nonzero blocks per block row (same for all rows) 1727 - nnz - array containing the number of nonzero blocks in the various block rows 1728 (possibly different for each block row) or PETSC_NULL 1729 1730 Output Parameter: 1731 . A - the matrix 1732 1733 Options Database Keys: 1734 . -mat_no_unroll - uses code that does not unroll the loops in the 1735 block calculations (much slower) 1736 . -mat_block_size - size of the blocks to use 1737 1738 Level: intermediate 1739 1740 Notes: 1741 A nonzero block is any block that as 1 or more nonzeros in it 1742 1743 The block AIJ format is fully compatible with standard Fortran 77 1744 storage. That is, the stored row and column indices can begin at 1745 either one (as in Fortran) or zero. See the users' manual for details. 1746 1747 Specify the preallocated storage with either nz or nnz (not both). 1748 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1749 allocation. For additional details, see the users manual chapter on 1750 matrices. 1751 1752 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1753 @*/ 1754 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1755 { 1756 int ierr; 1757 1758 PetscFunctionBegin; 1759 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1760 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1761 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 } 1764 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1767 /*@C 1768 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1769 per row in the matrix. For good matrix assembly performance the 1770 user should preallocate the matrix storage by setting the parameter nz 1771 (or the array nnz). By setting these parameters accurately, performance 1772 during matrix assembly can be increased by more than a factor of 50. 1773 1774 Collective on MPI_Comm 1775 1776 Input Parameters: 1777 + A - the matrix 1778 . bs - size of block 1779 . nz - number of block nonzeros per block row (same for all rows) 1780 - nnz - array containing the number of block nonzeros in the various block rows 1781 (possibly different for each block row) or PETSC_NULL 1782 1783 Options Database Keys: 1784 . -mat_no_unroll - uses code that does not unroll the loops in the 1785 block calculations (much slower) 1786 . -mat_block_size - size of the blocks to use 1787 1788 Level: intermediate 1789 1790 Notes: 1791 The block AIJ format is fully compatible with standard Fortran 77 1792 storage. That is, the stored row and column indices can begin at 1793 either one (as in Fortran) or zero. See the users' manual for details. 1794 1795 Specify the preallocated storage with either nz or nnz (not both). 1796 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1797 allocation. For additional details, see the users manual chapter on 1798 matrices. 1799 1800 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1801 @*/ 1802 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1803 { 1804 Mat_SeqBAIJ *b; 1805 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1806 PetscTruth flg; 1807 1808 PetscFunctionBegin; 1809 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1810 if (!flg) PetscFunctionReturn(0); 1811 1812 B->preallocated = PETSC_TRUE; 1813 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1814 if (nnz && newbs != bs) { 1815 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1816 } 1817 bs = newbs; 1818 1819 mbs = B->m/bs; 1820 nbs = B->n/bs; 1821 bs2 = bs*bs; 1822 1823 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1824 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1825 } 1826 1827 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1828 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1829 if (nnz) { 1830 for (i=0; i<mbs; i++) { 1831 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1832 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1833 } 1834 } 1835 1836 b = (Mat_SeqBAIJ*)B->data; 1837 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1838 B->ops->solve = MatSolve_SeqBAIJ_Update; 1839 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1840 if (!flg) { 1841 switch (bs) { 1842 case 1: 1843 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1844 B->ops->mult = MatMult_SeqBAIJ_1; 1845 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1846 break; 1847 case 2: 1848 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1849 B->ops->mult = MatMult_SeqBAIJ_2; 1850 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1851 break; 1852 case 3: 1853 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1854 B->ops->mult = MatMult_SeqBAIJ_3; 1855 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1856 break; 1857 case 4: 1858 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1859 B->ops->mult = MatMult_SeqBAIJ_4; 1860 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1861 break; 1862 case 5: 1863 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1864 B->ops->mult = MatMult_SeqBAIJ_5; 1865 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1866 break; 1867 case 6: 1868 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1869 B->ops->mult = MatMult_SeqBAIJ_6; 1870 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1871 break; 1872 case 7: 1873 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1874 B->ops->mult = MatMult_SeqBAIJ_7; 1875 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1876 break; 1877 default: 1878 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1879 B->ops->mult = MatMult_SeqBAIJ_N; 1880 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1881 break; 1882 } 1883 } 1884 b->bs = bs; 1885 b->mbs = mbs; 1886 b->nbs = nbs; 1887 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1888 if (!nnz) { 1889 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1890 else if (nz <= 0) nz = 1; 1891 for (i=0; i<mbs; i++) b->imax[i] = nz; 1892 nz = nz*mbs; 1893 } else { 1894 nz = 0; 1895 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1896 } 1897 1898 /* allocate the matrix space */ 1899 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1900 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1901 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1902 b->j = (int*)(b->a + nz*bs2); 1903 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1904 b->i = b->j + nz; 1905 b->singlemalloc = PETSC_TRUE; 1906 1907 b->i[0] = 0; 1908 for (i=1; i<mbs+1; i++) { 1909 b->i[i] = b->i[i-1] + b->imax[i-1]; 1910 } 1911 1912 /* b->ilen will count nonzeros in each block row so far. */ 1913 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1914 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1915 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1916 1917 b->bs = bs; 1918 b->bs2 = bs2; 1919 b->mbs = mbs; 1920 b->nz = 0; 1921 b->maxnz = nz*bs2; 1922 B->info.nz_unneeded = (PetscReal)b->maxnz; 1923 PetscFunctionReturn(0); 1924 } 1925 1926