1 /*$Id: baij.c,v 1.188 1999/11/05 14:45:32 bsmith Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "sys.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNC__ 18 #define __FUNC__ "MatMissingDiag_SeqBAIJ" 19 int MatMissingDiag_SeqBAIJ(Mat A) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 22 int *diag = a->diag, *jj = a->j,i; 23 24 PetscFunctionBegin; 25 for ( i=0; i<a->mbs; i++ ) { 26 if (jj[diag[i]] != i) { 27 SETERRQ1(1,1,"Matrix is missing diagonal number %d",i); 28 } 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNC__ 34 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 35 int MatMarkDiag_SeqBAIJ(Mat A) 36 { 37 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 38 int i,j, *diag, m = a->mbs; 39 40 PetscFunctionBegin; 41 diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag); 42 PLogObjectMemory(A,(m+1)*sizeof(int)); 43 for ( i=0; i<m; i++ ) { 44 diag[i] = a->i[i+1]; 45 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 46 if (a->j[j] == i) { 47 diag[i] = j; 48 break; 49 } 50 } 51 } 52 a->diag = diag; 53 PetscFunctionReturn(0); 54 } 55 56 57 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 58 59 #undef __FUNC__ 60 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 61 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 62 PetscTruth *done) 63 { 64 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 65 int ierr, n = a->mbs,i; 66 67 PetscFunctionBegin; 68 *nn = n; 69 if (!ia) PetscFunctionReturn(0); 70 if (symmetric) { 71 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 72 } else if (oshift == 1) { 73 /* temporarily add 1 to i and j indices */ 74 int nz = a->i[n] + 1; 75 for ( i=0; i<nz; i++ ) a->j[i]++; 76 for ( i=0; i<n+1; i++ ) a->i[i]++; 77 *ia = a->i; *ja = a->j; 78 } else { 79 *ia = a->i; *ja = a->j; 80 } 81 82 PetscFunctionReturn(0); 83 } 84 85 #undef __FUNC__ 86 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 87 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 88 PetscTruth *done) 89 { 90 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 91 int i,n = a->mbs,ierr; 92 93 PetscFunctionBegin; 94 if (!ia) PetscFunctionReturn(0); 95 if (symmetric) { 96 ierr = PetscFree(*ia);CHKERRQ(ierr); 97 ierr = PetscFree(*ja);CHKERRQ(ierr); 98 } else if (oshift == 1) { 99 int nz = a->i[n]; 100 for ( i=0; i<nz; i++ ) a->j[i]--; 101 for ( i=0; i<n+1; i++ ) a->i[i]--; 102 } 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNC__ 107 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 108 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 109 { 110 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 111 112 PetscFunctionBegin; 113 *bs = baij->bs; 114 PetscFunctionReturn(0); 115 } 116 117 118 #undef __FUNC__ 119 #define __FUNC__ "MatDestroy_SeqBAIJ" 120 int MatDestroy_SeqBAIJ(Mat A) 121 { 122 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 123 int ierr; 124 125 PetscFunctionBegin; 126 if (A->mapping) { 127 ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr); 128 } 129 if (A->bmapping) { 130 ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr); 131 } 132 if (A->rmap) { 133 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 134 } 135 if (A->cmap) { 136 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 137 } 138 #if defined(PETSC_USE_LOG) 139 PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 140 #endif 141 ierr = PetscFree(a->a);CHKERRQ(ierr); 142 if (!a->singlemalloc) { 143 ierr = PetscFree(a->i);CHKERRQ(ierr); 144 ierr = PetscFree(a->j);CHKERRQ(ierr); 145 } 146 if (a->row) { 147 ierr = ISDestroy(a->row);CHKERRQ(ierr); 148 } 149 if (a->col) { 150 ierr = ISDestroy(a->col);CHKERRQ(ierr); 151 } 152 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 153 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 154 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 155 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 156 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 157 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 158 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 159 ierr = PetscFree(a);CHKERRQ(ierr); 160 PLogObjectDestroy(A); 161 PetscHeaderDestroy(A); 162 PetscFunctionReturn(0); 163 } 164 165 #undef __FUNC__ 166 #define __FUNC__ "MatSetOption_SeqBAIJ" 167 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 168 { 169 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 170 171 PetscFunctionBegin; 172 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 173 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 174 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 175 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 176 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 177 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 178 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 179 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 180 else if (op == MAT_ROWS_SORTED || 181 op == MAT_ROWS_UNSORTED || 182 op == MAT_SYMMETRIC || 183 op == MAT_STRUCTURALLY_SYMMETRIC || 184 op == MAT_YES_NEW_DIAGONALS || 185 op == MAT_IGNORE_OFF_PROC_ENTRIES || 186 op == MAT_USE_HASH_TABLE) { 187 PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 188 } else if (op == MAT_NO_NEW_DIAGONALS) { 189 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 190 } else { 191 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 197 #undef __FUNC__ 198 #define __FUNC__ "MatGetSize_SeqBAIJ" 199 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 200 { 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 202 203 PetscFunctionBegin; 204 if (m) *m = a->m; 205 if (n) *n = a->n; 206 PetscFunctionReturn(0); 207 } 208 209 #undef __FUNC__ 210 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 211 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 212 { 213 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 214 215 PetscFunctionBegin; 216 *m = 0; *n = a->m; 217 PetscFunctionReturn(0); 218 } 219 220 #undef __FUNC__ 221 #define __FUNC__ "MatGetRow_SeqBAIJ" 222 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 223 { 224 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 225 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 226 MatScalar *aa,*aa_i; 227 Scalar *v_i; 228 229 PetscFunctionBegin; 230 bs = a->bs; 231 ai = a->i; 232 aj = a->j; 233 aa = a->a; 234 bs2 = a->bs2; 235 236 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 237 238 bn = row/bs; /* Block number */ 239 bp = row % bs; /* Block Position */ 240 M = ai[bn+1] - ai[bn]; 241 *nz = bs*M; 242 243 if (v) { 244 *v = 0; 245 if (*nz) { 246 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v); 247 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 248 v_i = *v + i*bs; 249 aa_i = aa + bs2*(ai[bn] + i); 250 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 251 } 252 } 253 } 254 255 if (idx) { 256 *idx = 0; 257 if (*nz) { 258 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx); 259 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 260 idx_i = *idx + i*bs; 261 itmp = bs*aj[ai[bn] + i]; 262 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 263 } 264 } 265 } 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNC__ 270 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 271 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 272 { 273 int ierr; 274 275 PetscFunctionBegin; 276 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 277 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNC__ 282 #define __FUNC__ "MatTranspose_SeqBAIJ" 283 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 284 { 285 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 286 Mat C; 287 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 288 int *rows,*cols,bs2=a->bs2; 289 MatScalar *array = a->a; 290 291 PetscFunctionBegin; 292 if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 293 col = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col); 294 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 295 296 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 297 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 298 ierr = PetscFree(col);CHKERRQ(ierr); 299 rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows); 300 cols = rows + bs; 301 for ( i=0; i<mbs; i++ ) { 302 cols[0] = i*bs; 303 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 304 len = ai[i+1] - ai[i]; 305 for ( j=0; j<len; j++ ) { 306 rows[0] = (*aj++)*bs; 307 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 308 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 309 array += bs2; 310 } 311 } 312 ierr = PetscFree(rows);CHKERRQ(ierr); 313 314 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 315 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 316 317 if (B != PETSC_NULL) { 318 *B = C; 319 } else { 320 PetscOps *Abops; 321 MatOps Aops; 322 323 /* This isn't really an in-place transpose */ 324 ierr = PetscFree(a->a);CHKERRQ(ierr); 325 if (!a->singlemalloc) { 326 ierr = PetscFree(a->i);CHKERRQ(ierr); 327 ierr = PetscFree(a->j);CHKERRQ(ierr); 328 } 329 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 330 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 331 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 332 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 333 ierr = PetscFree(a);CHKERRQ(ierr); 334 335 336 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 337 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 338 339 /* 340 This is horrible, horrible code. We need to keep the 341 A pointers for the bops and ops but copy everything 342 else from C. 343 */ 344 Abops = A->bops; 345 Aops = A->ops; 346 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 347 A->bops = Abops; 348 A->ops = Aops; 349 A->qlist = 0; 350 351 PetscHeaderDestroy(C); 352 } 353 PetscFunctionReturn(0); 354 } 355 356 #undef __FUNC__ 357 #define __FUNC__ "MatView_SeqBAIJ_Binary" 358 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 359 { 360 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 361 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 362 Scalar *aa; 363 FILE *file; 364 365 PetscFunctionBegin; 366 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 367 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 368 col_lens[0] = MAT_COOKIE; 369 370 col_lens[1] = a->m; 371 col_lens[2] = a->n; 372 col_lens[3] = a->nz*bs2; 373 374 /* store lengths of each row and write (including header) to file */ 375 count = 0; 376 for ( i=0; i<a->mbs; i++ ) { 377 for ( j=0; j<bs; j++ ) { 378 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 379 } 380 } 381 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr); 382 ierr = PetscFree(col_lens);CHKERRQ(ierr); 383 384 /* store column indices (zero start index) */ 385 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj); 386 count = 0; 387 for ( i=0; i<a->mbs; i++ ) { 388 for ( j=0; j<bs; j++ ) { 389 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 390 for ( l=0; l<bs; l++ ) { 391 jj[count++] = bs*a->j[k] + l; 392 } 393 } 394 } 395 } 396 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 397 ierr = PetscFree(jj);CHKERRQ(ierr); 398 399 /* store nonzero values */ 400 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa); 401 count = 0; 402 for ( i=0; i<a->mbs; i++ ) { 403 for ( j=0; j<bs; j++ ) { 404 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 405 for ( l=0; l<bs; l++ ) { 406 aa[count++] = a->a[bs2*k + l*bs + j]; 407 } 408 } 409 } 410 } 411 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 412 ierr = PetscFree(aa);CHKERRQ(ierr); 413 414 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 415 if (file) { 416 fprintf(file,"-matload_block_size %d\n",a->bs); 417 } 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNC__ 422 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 423 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 424 { 425 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 426 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 427 char *outputname; 428 429 PetscFunctionBegin; 430 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 431 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 432 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 433 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 434 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 435 SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported"); 436 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 437 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 438 for ( i=0; i<a->mbs; i++ ) { 439 for ( j=0; j<bs; j++ ) { 440 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 441 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 442 for ( l=0; l<bs; l++ ) { 443 #if defined(PETSC_USE_COMPLEX) 444 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 445 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 446 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 447 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 448 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 449 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 450 } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 451 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 452 } 453 #else 454 if (a->a[bs2*k + l*bs + j] != 0.0) { 455 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 456 } 457 #endif 458 } 459 } 460 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 461 } 462 } 463 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 464 } else { 465 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 466 for ( i=0; i<a->mbs; i++ ) { 467 for ( j=0; j<bs; j++ ) { 468 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 469 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 470 for ( l=0; l<bs; l++ ) { 471 #if defined(PETSC_USE_COMPLEX) 472 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) { 473 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 474 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 475 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) { 476 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 477 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 478 } else { 479 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 480 } 481 #else 482 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 483 #endif 484 } 485 } 486 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 487 } 488 } 489 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 490 } 491 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 #undef __FUNC__ 496 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom" 497 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa) 498 { 499 Mat A = (Mat) Aa; 500 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 501 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 502 double xl,yl,xr,yr,x_l,x_r,y_l,y_r; 503 MatScalar *aa; 504 MPI_Comm comm; 505 Viewer viewer; 506 507 PetscFunctionBegin; 508 /* 509 This is nasty. If this is called from an originally parallel matrix 510 then all processes call this, but only the first has the matrix so the 511 rest should return immediately. 512 */ 513 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 514 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 515 if (rank) PetscFunctionReturn(0); 516 517 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr); 518 519 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 520 521 /* loop over matrix elements drawing boxes */ 522 color = DRAW_BLUE; 523 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 524 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 525 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 526 x_l = a->j[j]*bs; x_r = x_l + 1.0; 527 aa = a->a + j*bs2; 528 for ( k=0; k<bs; k++ ) { 529 for ( l=0; l<bs; l++ ) { 530 if (PetscReal(*aa++) >= 0.) continue; 531 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 532 } 533 } 534 } 535 } 536 color = DRAW_CYAN; 537 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 538 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 539 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 540 x_l = a->j[j]*bs; x_r = x_l + 1.0; 541 aa = a->a + j*bs2; 542 for ( k=0; k<bs; k++ ) { 543 for ( l=0; l<bs; l++ ) { 544 if (PetscReal(*aa++) != 0.) continue; 545 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 546 } 547 } 548 } 549 } 550 551 color = DRAW_RED; 552 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 553 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 554 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 555 x_l = a->j[j]*bs; x_r = x_l + 1.0; 556 aa = a->a + j*bs2; 557 for ( k=0; k<bs; k++ ) { 558 for ( l=0; l<bs; l++ ) { 559 if (PetscReal(*aa++) <= 0.) continue; 560 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 561 } 562 } 563 } 564 } 565 PetscFunctionReturn(0); 566 } 567 568 #undef __FUNC__ 569 #define __FUNC__ "MatView_SeqBAIJ_Draw" 570 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 571 { 572 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 573 int ierr; 574 double xl,yl,xr,yr,w,h; 575 Draw draw; 576 PetscTruth isnull; 577 578 PetscFunctionBegin; 579 580 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 581 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 582 583 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 584 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 585 xr += w; yr += h; xl = -w; yl = -h; 586 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 587 ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 588 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 589 PetscFunctionReturn(0); 590 } 591 592 #undef __FUNC__ 593 #define __FUNC__ "MatView_SeqBAIJ" 594 int MatView_SeqBAIJ(Mat A,Viewer viewer) 595 { 596 int ierr; 597 PetscTruth issocket,isascii,isbinary,isdraw; 598 599 PetscFunctionBegin; 600 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 601 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 602 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 603 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 604 if (issocket) { 605 SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported"); 606 } else if (isascii){ 607 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 608 } else if (isbinary) { 609 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 610 } else if (isdraw) { 611 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 612 } else { 613 SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 614 } 615 PetscFunctionReturn(0); 616 } 617 618 619 #undef __FUNC__ 620 #define __FUNC__ "MatGetValues_SeqBAIJ" 621 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 622 { 623 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 624 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 625 int *ai = a->i, *ailen = a->ilen; 626 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 627 MatScalar *ap, *aa = a->a, zero = 0.0; 628 629 PetscFunctionBegin; 630 for ( k=0; k<m; k++ ) { /* loop over rows */ 631 row = im[k]; brow = row/bs; 632 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 633 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 634 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 635 nrow = ailen[brow]; 636 for ( l=0; l<n; l++ ) { /* loop over columns */ 637 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 638 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 639 col = in[l] ; 640 bcol = col/bs; 641 cidx = col%bs; 642 ridx = row%bs; 643 high = nrow; 644 low = 0; /* assume unsorted */ 645 while (high-low > 5) { 646 t = (low+high)/2; 647 if (rp[t] > bcol) high = t; 648 else low = t; 649 } 650 for ( i=low; i<high; i++ ) { 651 if (rp[i] > bcol) break; 652 if (rp[i] == bcol) { 653 *v++ = ap[bs2*i+bs*cidx+ridx]; 654 goto finished; 655 } 656 } 657 *v++ = zero; 658 finished:; 659 } 660 } 661 PetscFunctionReturn(0); 662 } 663 664 665 #undef __FUNC__ 666 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 667 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 668 { 669 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 670 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 671 int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 672 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 673 Scalar *value = v; 674 MatScalar *ap,*aa=a->a,*bap; 675 676 PetscFunctionBegin; 677 if (roworiented) { 678 stepval = (n-1)*bs; 679 } else { 680 stepval = (m-1)*bs; 681 } 682 for ( k=0; k<m; k++ ) { /* loop over added rows */ 683 row = im[k]; 684 if (row < 0) continue; 685 #if defined(PETSC_USE_BOPT_g) 686 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 687 #endif 688 rp = aj + ai[row]; 689 ap = aa + bs2*ai[row]; 690 rmax = imax[row]; 691 nrow = ailen[row]; 692 low = 0; 693 for ( l=0; l<n; l++ ) { /* loop over added columns */ 694 if (in[l] < 0) continue; 695 #if defined(PETSC_USE_BOPT_g) 696 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 697 #endif 698 col = in[l]; 699 if (roworiented) { 700 value = v + k*(stepval+bs)*bs + l*bs; 701 } else { 702 value = v + l*(stepval+bs)*bs + k*bs; 703 } 704 if (!sorted) low = 0; high = nrow; 705 while (high-low > 7) { 706 t = (low+high)/2; 707 if (rp[t] > col) high = t; 708 else low = t; 709 } 710 for ( i=low; i<high; i++ ) { 711 if (rp[i] > col) break; 712 if (rp[i] == col) { 713 bap = ap + bs2*i; 714 if (roworiented) { 715 if (is == ADD_VALUES) { 716 for ( ii=0; ii<bs; ii++,value+=stepval ) { 717 for (jj=ii; jj<bs2; jj+=bs ) { 718 bap[jj] += *value++; 719 } 720 } 721 } else { 722 for ( ii=0; ii<bs; ii++,value+=stepval ) { 723 for (jj=ii; jj<bs2; jj+=bs ) { 724 bap[jj] = *value++; 725 } 726 } 727 } 728 } else { 729 if (is == ADD_VALUES) { 730 for ( ii=0; ii<bs; ii++,value+=stepval ) { 731 for (jj=0; jj<bs; jj++ ) { 732 *bap++ += *value++; 733 } 734 } 735 } else { 736 for ( ii=0; ii<bs; ii++,value+=stepval ) { 737 for (jj=0; jj<bs; jj++ ) { 738 *bap++ = *value++; 739 } 740 } 741 } 742 } 743 goto noinsert2; 744 } 745 } 746 if (nonew == 1) goto noinsert2; 747 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 748 if (nrow >= rmax) { 749 /* there is no extra room in row, therefore enlarge */ 750 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 751 MatScalar *new_a; 752 753 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 754 755 /* malloc new storage space */ 756 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 757 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 758 new_j = (int *) (new_a + bs2*new_nz); 759 new_i = new_j + new_nz; 760 761 /* copy over old data into new slots */ 762 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 763 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 764 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 765 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 766 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 767 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 768 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 769 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 770 /* free up old matrix storage */ 771 ierr = PetscFree(a->a);CHKERRQ(ierr); 772 if (!a->singlemalloc) { 773 ierr = PetscFree(a->i);CHKERRQ(ierr); 774 ierr = PetscFree(a->j);CHKERRQ(ierr); 775 } 776 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 777 a->singlemalloc = 1; 778 779 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 780 rmax = imax[row] = imax[row] + CHUNKSIZE; 781 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 782 a->maxnz += bs2*CHUNKSIZE; 783 a->reallocs++; 784 a->nz++; 785 } 786 N = nrow++ - 1; 787 /* shift up all the later entries in this row */ 788 for ( ii=N; ii>=i; ii-- ) { 789 rp[ii+1] = rp[ii]; 790 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 791 } 792 if (N >= i) { 793 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 794 } 795 rp[i] = col; 796 bap = ap + bs2*i; 797 if (roworiented) { 798 for ( ii=0; ii<bs; ii++,value+=stepval) { 799 for (jj=ii; jj<bs2; jj+=bs ) { 800 bap[jj] = *value++; 801 } 802 } 803 } else { 804 for ( ii=0; ii<bs; ii++,value+=stepval ) { 805 for (jj=0; jj<bs; jj++ ) { 806 *bap++ = *value++; 807 } 808 } 809 } 810 noinsert2:; 811 low = i; 812 } 813 ailen[row] = nrow; 814 } 815 PetscFunctionReturn(0); 816 } 817 818 819 #undef __FUNC__ 820 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 821 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 822 { 823 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 824 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 825 int m = a->m,*ip, N, *ailen = a->ilen; 826 int mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr; 827 MatScalar *aa = a->a, *ap; 828 829 PetscFunctionBegin; 830 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 831 832 if (m) rmax = ailen[0]; 833 for ( i=1; i<mbs; i++ ) { 834 /* move each row back by the amount of empty slots (fshift) before it*/ 835 fshift += imax[i-1] - ailen[i-1]; 836 rmax = PetscMax(rmax,ailen[i]); 837 if (fshift) { 838 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 839 N = ailen[i]; 840 for ( j=0; j<N; j++ ) { 841 ip[j-fshift] = ip[j]; 842 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 843 } 844 } 845 ai[i] = ai[i-1] + ailen[i-1]; 846 } 847 if (mbs) { 848 fshift += imax[mbs-1] - ailen[mbs-1]; 849 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 850 } 851 /* reset ilen and imax for each row */ 852 for ( i=0; i<mbs; i++ ) { 853 ailen[i] = imax[i] = ai[i+1] - ai[i]; 854 } 855 a->nz = ai[mbs]; 856 857 /* diagonals may have moved, so kill the diagonal pointers */ 858 if (fshift && a->diag) { 859 ierr = PetscFree(a->diag);CHKERRQ(ierr); 860 PLogObjectMemory(A,-(m+1)*sizeof(int)); 861 a->diag = 0; 862 } 863 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 864 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 865 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 866 a->reallocs); 867 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 868 a->reallocs = 0; 869 A->info.nz_unneeded = (double)fshift*bs2; 870 871 PetscFunctionReturn(0); 872 } 873 874 875 876 /* 877 This function returns an array of flags which indicate the locations of contiguous 878 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 879 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 880 Assume: sizes should be long enough to hold all the values. 881 */ 882 #undef __FUNC__ 883 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks" 884 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 885 { 886 int i,j,k,row; 887 PetscTruth flg; 888 889 PetscFunctionBegin; 890 for ( i=0,j=0; i<n; j++ ) { 891 row = idx[i]; 892 if (row%bs!=0) { /* Not the begining of a block */ 893 sizes[j] = 1; 894 i++; 895 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 896 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 897 i++; 898 } else { /* Begining of the block, so check if the complete block exists */ 899 flg = PETSC_TRUE; 900 for ( k=1; k<bs; k++ ) { 901 if (row+k != idx[i+k]) { /* break in the block */ 902 flg = PETSC_FALSE; 903 break; 904 } 905 } 906 if (flg == PETSC_TRUE) { /* No break in the bs */ 907 sizes[j] = bs; 908 i+= bs; 909 } else { 910 sizes[j] = 1; 911 i++; 912 } 913 } 914 } 915 *bs_max = j; 916 PetscFunctionReturn(0); 917 } 918 919 #undef __FUNC__ 920 #define __FUNC__ "MatZeroRows_SeqBAIJ" 921 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 922 { 923 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 924 int ierr,i,j,k,count,is_n,*is_idx,*rows; 925 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 926 Scalar zero = 0.0; 927 MatScalar *aa; 928 929 PetscFunctionBegin; 930 /* Make a copy of the IS and sort it */ 931 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 932 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 933 934 /* allocate memory for rows,sizes */ 935 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows); 936 sizes = rows + is_n; 937 938 /* initialize copy IS valurs to rows, and sort them */ 939 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 940 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 941 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 942 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 943 944 for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) { 945 row = rows[j]; 946 if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row); 947 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 948 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 949 if (sizes[i] == bs) { 950 if (diag) { 951 if (baij->ilen[row/bs] > 0) { 952 baij->ilen[row/bs] = 1; 953 baij->j[baij->i[row/bs]] = row/bs; 954 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 955 } 956 /* Now insert all the diagoanl values for this bs */ 957 for ( k=0; k<bs; k++ ) { 958 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 959 } 960 } else { /* (!diag) */ 961 baij->ilen[row/bs] = 0; 962 } /* end (!diag) */ 963 } else { /* (sizes[i] != bs) */ 964 #if defined (PETSC_USE_DEBUG) 965 if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1"); 966 #endif 967 for ( k=0; k<count; k++ ) { 968 aa[0] = zero; 969 aa+=bs; 970 } 971 if (diag) { 972 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 973 } 974 } 975 } 976 977 ierr = PetscFree(rows);CHKERRQ(ierr); 978 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNC__ 983 #define __FUNC__ "MatSetValues_SeqBAIJ" 984 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 985 { 986 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 987 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 988 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 989 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 990 int ridx,cidx,bs2=a->bs2,ierr; 991 MatScalar *ap,value,*aa=a->a,*bap; 992 993 PetscFunctionBegin; 994 for ( k=0; k<m; k++ ) { /* loop over added rows */ 995 row = im[k]; brow = row/bs; 996 if (row < 0) continue; 997 #if defined(PETSC_USE_BOPT_g) 998 if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m); 999 #endif 1000 rp = aj + ai[brow]; 1001 ap = aa + bs2*ai[brow]; 1002 rmax = imax[brow]; 1003 nrow = ailen[brow]; 1004 low = 0; 1005 for ( l=0; l<n; l++ ) { /* loop over added columns */ 1006 if (in[l] < 0) continue; 1007 #if defined(PETSC_USE_BOPT_g) 1008 if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n); 1009 #endif 1010 col = in[l]; bcol = col/bs; 1011 ridx = row % bs; cidx = col % bs; 1012 if (roworiented) { 1013 value = v[l + k*n]; 1014 } else { 1015 value = v[k + l*m]; 1016 } 1017 if (!sorted) low = 0; high = nrow; 1018 while (high-low > 7) { 1019 t = (low+high)/2; 1020 if (rp[t] > bcol) high = t; 1021 else low = t; 1022 } 1023 for ( i=low; i<high; i++ ) { 1024 if (rp[i] > bcol) break; 1025 if (rp[i] == bcol) { 1026 bap = ap + bs2*i + bs*cidx + ridx; 1027 if (is == ADD_VALUES) *bap += value; 1028 else *bap = value; 1029 goto noinsert1; 1030 } 1031 } 1032 if (nonew == 1) goto noinsert1; 1033 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1034 if (nrow >= rmax) { 1035 /* there is no extra room in row, therefore enlarge */ 1036 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1037 MatScalar *new_a; 1038 1039 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1040 1041 /* Malloc new storage space */ 1042 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1043 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 1044 new_j = (int *) (new_a + bs2*new_nz); 1045 new_i = new_j + new_nz; 1046 1047 /* copy over old data into new slots */ 1048 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 1049 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1050 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1051 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1052 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1053 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1054 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1055 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1056 /* free up old matrix storage */ 1057 ierr = PetscFree(a->a);CHKERRQ(ierr); 1058 if (!a->singlemalloc) { 1059 ierr = PetscFree(a->i);CHKERRQ(ierr); 1060 ierr = PetscFree(a->j);CHKERRQ(ierr); 1061 } 1062 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1063 a->singlemalloc = 1; 1064 1065 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1066 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1067 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1068 a->maxnz += bs2*CHUNKSIZE; 1069 a->reallocs++; 1070 a->nz++; 1071 } 1072 N = nrow++ - 1; 1073 /* shift up all the later entries in this row */ 1074 for ( ii=N; ii>=i; ii-- ) { 1075 rp[ii+1] = rp[ii]; 1076 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1077 } 1078 if (N>=i) { 1079 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1080 } 1081 rp[i] = bcol; 1082 ap[bs2*i + bs*cidx + ridx] = value; 1083 noinsert1:; 1084 low = i; 1085 } 1086 ailen[brow] = nrow; 1087 } 1088 PetscFunctionReturn(0); 1089 } 1090 1091 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1092 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1093 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1094 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1095 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1096 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec); 1097 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1098 extern int MatScale_SeqBAIJ(Scalar*,Mat); 1099 extern int MatNorm_SeqBAIJ(Mat,NormType,double *); 1100 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*); 1101 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1102 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1103 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1104 extern int MatZeroEntries_SeqBAIJ(Mat); 1105 1106 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1107 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1108 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1109 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1110 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1111 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1112 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1113 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1114 extern int MatSolveTrans_SeqBAIJ_7(Mat,Vec,Vec); 1115 extern int MatSolveTrans_SeqBAIJ_6(Mat,Vec,Vec); 1116 extern int MatSolveTrans_SeqBAIJ_5(Mat,Vec,Vec); 1117 extern int MatSolveTrans_SeqBAIJ_4(Mat,Vec,Vec); 1118 extern int MatSolveTrans_SeqBAIJ_3(Mat,Vec,Vec); 1119 extern int MatSolveTrans_SeqBAIJ_2(Mat,Vec,Vec); 1120 extern int MatSolveTrans_SeqBAIJ_1(Mat,Vec,Vec); 1121 1122 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1123 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1124 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1125 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1126 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1127 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1128 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1129 1130 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1131 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1132 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1133 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1134 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1135 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1136 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1137 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1138 1139 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1140 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1141 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1142 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1143 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1144 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1145 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1146 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1147 1148 #undef __FUNC__ 1149 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1150 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1151 { 1152 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1153 Mat outA; 1154 int ierr; 1155 PetscTruth row_identity, col_identity; 1156 1157 PetscFunctionBegin; 1158 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU"); 1159 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1160 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1161 if (!row_identity || !col_identity) { 1162 SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU"); 1163 } 1164 1165 outA = inA; 1166 inA->factor = FACTOR_LU; 1167 1168 if (!a->diag) { 1169 ierr = MatMarkDiag_SeqBAIJ(inA);CHKERRQ(ierr); 1170 } 1171 /* 1172 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1173 for ILU(0) factorization with natural ordering 1174 */ 1175 switch (a->bs) { 1176 case 1: 1177 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_2_NaturalOrdering; 1178 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1179 case 2: 1180 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1181 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1182 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_2_NaturalOrdering; 1183 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1184 break; 1185 case 3: 1186 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1187 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1188 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_3_NaturalOrdering; 1189 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1190 break; 1191 case 4: 1192 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1193 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1194 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_4_NaturalOrdering; 1195 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1196 break; 1197 case 5: 1198 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1199 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1200 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_5_NaturalOrdering; 1201 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1202 break; 1203 case 6: 1204 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1205 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1206 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_6_NaturalOrdering; 1207 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1208 break; 1209 case 7: 1210 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1211 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_7_NaturalOrdering; 1212 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1213 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1214 break; 1215 default: 1216 a->row = row; 1217 a->col = col; 1218 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1219 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1220 1221 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1222 ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr); 1223 PLogObjectParent(inA,a->icol); 1224 1225 if (!a->solve_work) { 1226 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1227 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1228 } 1229 } 1230 1231 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1232 1233 PetscFunctionReturn(0); 1234 } 1235 #undef __FUNC__ 1236 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1237 int MatPrintHelp_SeqBAIJ(Mat A) 1238 { 1239 static PetscTruth called = PETSC_FALSE; 1240 MPI_Comm comm = A->comm; 1241 int ierr; 1242 1243 PetscFunctionBegin; 1244 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1245 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1246 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 EXTERN_C_BEGIN 1251 #undef __FUNC__ 1252 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1253 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1254 { 1255 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1256 int i,nz,n; 1257 1258 PetscFunctionBegin; 1259 nz = baij->maxnz; 1260 n = baij->n; 1261 for (i=0; i<nz; i++) { 1262 baij->j[i] = indices[i]; 1263 } 1264 baij->nz = nz; 1265 for ( i=0; i<n; i++ ) { 1266 baij->ilen[i] = baij->imax[i]; 1267 } 1268 1269 PetscFunctionReturn(0); 1270 } 1271 EXTERN_C_END 1272 1273 #undef __FUNC__ 1274 #define __FUNC__ "MatSeqBAIJSetColumnIndices" 1275 /*@ 1276 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1277 in the matrix. 1278 1279 Input Parameters: 1280 + mat - the SeqBAIJ matrix 1281 - indices - the column indices 1282 1283 Level: advanced 1284 1285 Notes: 1286 This can be called if you have precomputed the nonzero structure of the 1287 matrix and want to provide it to the matrix object to improve the performance 1288 of the MatSetValues() operation. 1289 1290 You MUST have set the correct numbers of nonzeros per row in the call to 1291 MatCreateSeqBAIJ(). 1292 1293 MUST be called before any calls to MatSetValues(); 1294 1295 @*/ 1296 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1297 { 1298 int ierr,(*f)(Mat,int *); 1299 1300 PetscFunctionBegin; 1301 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1302 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr); 1303 if (f) { 1304 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1305 } else { 1306 SETERRQ(1,1,"Wrong type of matrix to set column indices"); 1307 } 1308 PetscFunctionReturn(0); 1309 } 1310 1311 /* -------------------------------------------------------------------*/ 1312 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1313 MatGetRow_SeqBAIJ, 1314 MatRestoreRow_SeqBAIJ, 1315 MatMult_SeqBAIJ_N, 1316 MatMultAdd_SeqBAIJ_N, 1317 MatMultTrans_SeqBAIJ, 1318 MatMultTransAdd_SeqBAIJ, 1319 MatSolve_SeqBAIJ_N, 1320 0, 1321 0, 1322 0, 1323 MatLUFactor_SeqBAIJ, 1324 0, 1325 0, 1326 MatTranspose_SeqBAIJ, 1327 MatGetInfo_SeqBAIJ, 1328 MatEqual_SeqBAIJ, 1329 MatGetDiagonal_SeqBAIJ, 1330 MatDiagonalScale_SeqBAIJ, 1331 MatNorm_SeqBAIJ, 1332 0, 1333 MatAssemblyEnd_SeqBAIJ, 1334 0, 1335 MatSetOption_SeqBAIJ, 1336 MatZeroEntries_SeqBAIJ, 1337 MatZeroRows_SeqBAIJ, 1338 MatLUFactorSymbolic_SeqBAIJ, 1339 MatLUFactorNumeric_SeqBAIJ_N, 1340 0, 1341 0, 1342 MatGetSize_SeqBAIJ, 1343 MatGetSize_SeqBAIJ, 1344 MatGetOwnershipRange_SeqBAIJ, 1345 MatILUFactorSymbolic_SeqBAIJ, 1346 0, 1347 0, 1348 0, 1349 MatDuplicate_SeqBAIJ, 1350 0, 1351 0, 1352 MatILUFactor_SeqBAIJ, 1353 0, 1354 0, 1355 MatGetSubMatrices_SeqBAIJ, 1356 MatIncreaseOverlap_SeqBAIJ, 1357 MatGetValues_SeqBAIJ, 1358 0, 1359 MatPrintHelp_SeqBAIJ, 1360 MatScale_SeqBAIJ, 1361 0, 1362 0, 1363 0, 1364 MatGetBlockSize_SeqBAIJ, 1365 MatGetRowIJ_SeqBAIJ, 1366 MatRestoreRowIJ_SeqBAIJ, 1367 0, 1368 0, 1369 0, 1370 0, 1371 0, 1372 0, 1373 MatSetValuesBlocked_SeqBAIJ, 1374 MatGetSubMatrix_SeqBAIJ, 1375 0, 1376 0, 1377 MatGetMaps_Petsc}; 1378 1379 EXTERN_C_BEGIN 1380 #undef __FUNC__ 1381 #define __FUNC__ "MatStoreValues_SeqBAIJ" 1382 int MatStoreValues_SeqBAIJ(Mat mat) 1383 { 1384 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1385 int nz = aij->i[aij->m]*aij->bs*aij->bs2; 1386 int ierr; 1387 1388 PetscFunctionBegin; 1389 if (aij->nonew != 1) { 1390 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1391 } 1392 1393 /* allocate space for values if not already there */ 1394 if (!aij->saved_values) { 1395 aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values); 1396 } 1397 1398 /* copy values over */ 1399 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 EXTERN_C_END 1403 1404 EXTERN_C_BEGIN 1405 #undef __FUNC__ 1406 #define __FUNC__ "MatRetrieveValues_SeqBAIJ" 1407 int MatRetrieveValues_SeqBAIJ(Mat mat) 1408 { 1409 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1410 int nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr; 1411 1412 PetscFunctionBegin; 1413 if (aij->nonew != 1) { 1414 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1415 } 1416 if (!aij->saved_values) { 1417 SETERRQ(1,1,"Must call MatStoreValues(A);first"); 1418 } 1419 1420 /* copy values over */ 1421 ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 EXTERN_C_END 1425 1426 #undef __FUNC__ 1427 #define __FUNC__ "MatCreateSeqBAIJ" 1428 /*@C 1429 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1430 compressed row) format. For good matrix assembly performance the 1431 user should preallocate the matrix storage by setting the parameter nz 1432 (or the array nnz). By setting these parameters accurately, performance 1433 during matrix assembly can be increased by more than a factor of 50. 1434 1435 Collective on MPI_Comm 1436 1437 Input Parameters: 1438 + comm - MPI communicator, set to PETSC_COMM_SELF 1439 . bs - size of block 1440 . m - number of rows 1441 . n - number of columns 1442 . nz - number of block nonzeros per block row (same for all rows) 1443 - nnz - array containing the number of block nonzeros in the various block rows 1444 (possibly different for each block row) or PETSC_NULL 1445 1446 Output Parameter: 1447 . A - the matrix 1448 1449 Options Database Keys: 1450 . -mat_no_unroll - uses code that does not unroll the loops in the 1451 block calculations (much slower) 1452 . -mat_block_size - size of the blocks to use 1453 1454 Level: intermediate 1455 1456 Notes: 1457 The block AIJ format is fully compatible with standard Fortran 77 1458 storage. That is, the stored row and column indices can begin at 1459 either one (as in Fortran) or zero. See the users' manual for details. 1460 1461 Specify the preallocated storage with either nz or nnz (not both). 1462 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1463 allocation. For additional details, see the users manual chapter on 1464 matrices. 1465 1466 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1467 @*/ 1468 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1469 { 1470 Mat B; 1471 Mat_SeqBAIJ *b; 1472 int i,len,ierr,mbs,nbs,bs2,size; 1473 PetscTruth flg; 1474 1475 PetscFunctionBegin; 1476 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1477 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 1478 1479 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1480 mbs = m/bs; 1481 nbs = n/bs; 1482 bs2 = bs*bs; 1483 1484 if (mbs*bs!=m || nbs*bs!=n) { 1485 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 1486 } 1487 1488 if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz); 1489 if (nnz) { 1490 for (i=0; i<mbs; i++) { 1491 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1492 } 1493 } 1494 1495 *A = 0; 1496 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView); 1497 PLogObjectCreate(B); 1498 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b); 1499 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1500 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1501 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1502 if (!flg) { 1503 switch (bs) { 1504 case 1: 1505 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1506 B->ops->solve = MatSolve_SeqBAIJ_1; 1507 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_1; 1508 B->ops->mult = MatMult_SeqBAIJ_1; 1509 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1510 break; 1511 case 2: 1512 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1513 B->ops->solve = MatSolve_SeqBAIJ_2; 1514 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_2; 1515 B->ops->mult = MatMult_SeqBAIJ_2; 1516 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1517 break; 1518 case 3: 1519 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1520 B->ops->solve = MatSolve_SeqBAIJ_3; 1521 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_3; 1522 B->ops->mult = MatMult_SeqBAIJ_3; 1523 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1524 break; 1525 case 4: 1526 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1527 B->ops->solve = MatSolve_SeqBAIJ_4; 1528 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_4; 1529 B->ops->mult = MatMult_SeqBAIJ_4; 1530 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1531 break; 1532 case 5: 1533 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1534 B->ops->solve = MatSolve_SeqBAIJ_5; 1535 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_5; 1536 B->ops->mult = MatMult_SeqBAIJ_5; 1537 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1538 break; 1539 case 6: 1540 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1541 B->ops->solve = MatSolve_SeqBAIJ_6; 1542 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_6; 1543 B->ops->mult = MatMult_SeqBAIJ_6; 1544 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1545 break; 1546 case 7: 1547 B->ops->mult = MatMult_SeqBAIJ_7; 1548 B->ops->solve = MatSolve_SeqBAIJ_7; 1549 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_7; 1550 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1551 break; 1552 } 1553 } 1554 B->ops->destroy = MatDestroy_SeqBAIJ; 1555 B->ops->view = MatView_SeqBAIJ; 1556 B->factor = 0; 1557 B->lupivotthreshold = 1.0; 1558 B->mapping = 0; 1559 b->row = 0; 1560 b->col = 0; 1561 b->icol = 0; 1562 b->reallocs = 0; 1563 b->saved_values = 0; 1564 1565 b->m = m; B->m = m; B->M = m; 1566 b->n = n; B->n = n; B->N = n; 1567 1568 ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr); 1569 ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr); 1570 1571 b->mbs = mbs; 1572 b->nbs = nbs; 1573 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax); 1574 if (nnz == PETSC_NULL) { 1575 if (nz == PETSC_DEFAULT) nz = 5; 1576 else if (nz <= 0) nz = 1; 1577 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1578 nz = nz*mbs; 1579 } else { 1580 nz = 0; 1581 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1582 } 1583 1584 /* allocate the matrix space */ 1585 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int); 1586 b->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a); 1587 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1588 b->j = (int *) (b->a + nz*bs2); 1589 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1590 b->i = b->j + nz; 1591 b->singlemalloc = 1; 1592 1593 b->i[0] = 0; 1594 for (i=1; i<mbs+1; i++) { 1595 b->i[i] = b->i[i-1] + b->imax[i-1]; 1596 } 1597 1598 /* b->ilen will count nonzeros in each block row so far. */ 1599 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1600 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1601 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1602 1603 b->bs = bs; 1604 b->bs2 = bs2; 1605 b->mbs = mbs; 1606 b->nz = 0; 1607 b->maxnz = nz*bs2; 1608 b->sorted = 0; 1609 b->roworiented = 1; 1610 b->nonew = 0; 1611 b->diag = 0; 1612 b->solve_work = 0; 1613 b->mult_work = 0; 1614 b->spptr = 0; 1615 B->info.nz_unneeded = (double)b->maxnz; 1616 1617 *A = B; 1618 ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr); 1619 if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); } 1620 1621 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1622 "MatStoreValues_SeqBAIJ", 1623 (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1624 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1625 "MatRetrieveValues_SeqBAIJ", 1626 (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1628 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1629 (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1630 PetscFunctionReturn(0); 1631 } 1632 1633 #undef __FUNC__ 1634 #define __FUNC__ "MatDuplicate_SeqBAIJ" 1635 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1636 { 1637 Mat C; 1638 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1639 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1640 1641 PetscFunctionBegin; 1642 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 1643 1644 *B = 0; 1645 PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView); 1646 PLogObjectCreate(C); 1647 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c); 1648 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1649 C->ops->destroy = MatDestroy_SeqBAIJ; 1650 C->ops->view = MatView_SeqBAIJ; 1651 C->factor = A->factor; 1652 c->row = 0; 1653 c->col = 0; 1654 c->icol = 0; 1655 c->saved_values = 0; 1656 C->assembled = PETSC_TRUE; 1657 1658 c->m = C->m = a->m; 1659 c->n = C->n = a->n; 1660 C->M = a->m; 1661 C->N = a->n; 1662 1663 c->bs = a->bs; 1664 c->bs2 = a->bs2; 1665 c->mbs = a->mbs; 1666 c->nbs = a->nbs; 1667 1668 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax); 1669 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen); 1670 for ( i=0; i<mbs; i++ ) { 1671 c->imax[i] = a->imax[i]; 1672 c->ilen[i] = a->ilen[i]; 1673 } 1674 1675 /* allocate the matrix space */ 1676 c->singlemalloc = 1; 1677 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1678 c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a); 1679 c->j = (int *) (c->a + nz*bs2); 1680 c->i = c->j + nz; 1681 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1682 if (mbs > 0) { 1683 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1684 if (cpvalues == MAT_COPY_VALUES) { 1685 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1686 } else { 1687 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1688 } 1689 } 1690 1691 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1692 c->sorted = a->sorted; 1693 c->roworiented = a->roworiented; 1694 c->nonew = a->nonew; 1695 1696 if (a->diag) { 1697 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag); 1698 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1699 for ( i=0; i<mbs; i++ ) { 1700 c->diag[i] = a->diag[i]; 1701 } 1702 } else c->diag = 0; 1703 c->nz = a->nz; 1704 c->maxnz = a->maxnz; 1705 c->solve_work = 0; 1706 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1707 c->mult_work = 0; 1708 *B = C; 1709 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1710 PetscFunctionReturn(0); 1711 } 1712 1713 #undef __FUNC__ 1714 #define __FUNC__ "MatLoad_SeqBAIJ" 1715 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1716 { 1717 Mat_SeqBAIJ *a; 1718 Mat B; 1719 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1720 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1721 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1722 int *masked, nmask,tmp,bs2,ishift; 1723 Scalar *aa; 1724 MPI_Comm comm = ((PetscObject) viewer)->comm; 1725 1726 PetscFunctionBegin; 1727 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1728 bs2 = bs*bs; 1729 1730 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1731 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 1732 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1733 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1734 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 1735 M = header[1]; N = header[2]; nz = header[3]; 1736 1737 if (header[3] < 0) { 1738 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 1739 } 1740 1741 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1742 1743 /* 1744 This code adds extra rows to make sure the number of rows is 1745 divisible by the blocksize 1746 */ 1747 mbs = M/bs; 1748 extra_rows = bs - M + bs*(mbs); 1749 if (extra_rows == bs) extra_rows = 0; 1750 else mbs++; 1751 if (extra_rows) { 1752 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1753 } 1754 1755 /* read in row lengths */ 1756 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1757 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1758 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1759 1760 /* read in column indices */ 1761 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj); 1762 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1763 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1764 1765 /* loop over row lengths determining block row lengths */ 1766 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1767 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1768 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask); 1769 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1770 masked = mask + mbs; 1771 rowcount = 0; nzcount = 0; 1772 for ( i=0; i<mbs; i++ ) { 1773 nmask = 0; 1774 for ( j=0; j<bs; j++ ) { 1775 kmax = rowlengths[rowcount]; 1776 for ( k=0; k<kmax; k++ ) { 1777 tmp = jj[nzcount++]/bs; 1778 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1779 } 1780 rowcount++; 1781 } 1782 browlengths[i] += nmask; 1783 /* zero out the mask elements we set */ 1784 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1785 } 1786 1787 /* create our matrix */ 1788 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1789 B = *A; 1790 a = (Mat_SeqBAIJ *) B->data; 1791 1792 /* set matrix "i" values */ 1793 a->i[0] = 0; 1794 for ( i=1; i<= mbs; i++ ) { 1795 a->i[i] = a->i[i-1] + browlengths[i-1]; 1796 a->ilen[i-1] = browlengths[i-1]; 1797 } 1798 a->nz = 0; 1799 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1800 1801 /* read in nonzero values */ 1802 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1803 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1804 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1805 1806 /* set "a" and "j" values into matrix */ 1807 nzcount = 0; jcount = 0; 1808 for ( i=0; i<mbs; i++ ) { 1809 nzcountb = nzcount; 1810 nmask = 0; 1811 for ( j=0; j<bs; j++ ) { 1812 kmax = rowlengths[i*bs+j]; 1813 for ( k=0; k<kmax; k++ ) { 1814 tmp = jj[nzcount++]/bs; 1815 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1816 } 1817 rowcount++; 1818 } 1819 /* sort the masked values */ 1820 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1821 1822 /* set "j" values into matrix */ 1823 maskcount = 1; 1824 for ( j=0; j<nmask; j++ ) { 1825 a->j[jcount++] = masked[j]; 1826 mask[masked[j]] = maskcount++; 1827 } 1828 /* set "a" values into matrix */ 1829 ishift = bs2*a->i[i]; 1830 for ( j=0; j<bs; j++ ) { 1831 kmax = rowlengths[i*bs+j]; 1832 for ( k=0; k<kmax; k++ ) { 1833 tmp = jj[nzcountb]/bs ; 1834 block = mask[tmp] - 1; 1835 point = jj[nzcountb] - bs*tmp; 1836 idx = ishift + bs2*block + j + bs*point; 1837 a->a[idx] = aa[nzcountb++]; 1838 } 1839 } 1840 /* zero out the mask elements we set */ 1841 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1842 } 1843 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 1844 1845 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1846 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1847 ierr = PetscFree(aa);CHKERRQ(ierr); 1848 ierr = PetscFree(jj);CHKERRQ(ierr); 1849 ierr = PetscFree(mask);CHKERRQ(ierr); 1850 1851 B->assembled = PETSC_TRUE; 1852 1853 ierr = MatView_Private(B);CHKERRQ(ierr); 1854 PetscFunctionReturn(0); 1855 } 1856 1857 1858 1859