MISR Toolkit  1.5.1
H5ACpkg.h
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  * Copyright by The HDF Group. *
3  * Copyright by the Board of Trustees of the University of Illinois. *
4  * All rights reserved. *
5  * *
6  * This file is part of HDF5. The full HDF5 copyright notice, including *
7  * terms governing use, modification, and redistribution, is contained in *
8  * the COPYING file, which can be found at the root of the source code *
9  * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases. *
10  * If you do not have access to either file, you may request a copy from *
11  * help@hdfgroup.org. *
12  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
13 
14 /*
15  * Programmer: John Mainzer -- 4/19/06
16  *
17  * Purpose: This file contains declarations which are normally visible
18  * only within the H5AC package (just H5AC.c at present).
19  *
20  * Source files outside the H5AC package should include
21  * H5ACprivate.h instead.
22  *
23  * The one exception to this rule is testpar/t_cache.c. The
24  * test code is easier to write if it can look at H5AC_aux_t.
25  * Indeed, this is the main reason why this file was created.
26  *
27  */
28 
29 #ifndef H5AC_PACKAGE
30 #error "Do not include this file outside the H5AC package!"
31 #endif
32 
33 #ifndef _H5ACpkg_H
34 #define _H5ACpkg_H
35 
36 /* Get package's private header */
37 #include "H5ACprivate.h" /* Metadata cache */
38 
39 
40 /* Get needed headers */
41 #include "H5Cprivate.h" /* Cache */
42 #include "H5SLprivate.h" /* Skip lists */
43 
44 
45 #define H5AC_DEBUG_DIRTY_BYTES_CREATION 0
46 
47 #ifdef H5_HAVE_PARALLEL
48 
49 /* the following #defined are used to specify the operation required
50  * at a sync point.
51  */
52 
53 #define H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN 0
54 #define H5AC_SYNC_POINT_OP__FLUSH_CACHE 1
55 
56 #endif /* H5_HAVE_PARALLEL */
57 
58 /*-------------------------------------------------------------------------
59  * It is a bit difficult to set ranges of allowable values on the
60  * dirty_bytes_threshold field of H5AC_aux_t. The following are
61  * probably broader than they should be.
62  *-------------------------------------------------------------------------
63  */
64 
65 #define H5AC__MIN_DIRTY_BYTES_THRESHOLD (int32_t) \
66  (H5C__MIN_MAX_CACHE_SIZE / 2)
67 #define H5AC__DEFAULT_DIRTY_BYTES_THRESHOLD (256 * 1024)
68 #define H5AC__MAX_DIRTY_BYTES_THRESHOLD (int32_t) \
69  (H5C__MAX_MAX_CACHE_SIZE / 4)
70 
71 
72 /****************************************************************************
73  *
74  * structure H5AC_aux_t
75  *
76  * While H5AC has become a wrapper for the cache implemented in H5C.c, there
77  * are some features of the metadata cache that are specific to it, and which
78  * therefore do not belong in the more generic H5C cache code.
79  *
80  * In particular, there is the matter of synchronizing writes from the
81  * metadata cache to disk in the PHDF5 case.
82  *
83  * Prior to this update, the presumption was that all metadata caches would
84  * write the same data at the same time since all operations modifying
85  * metadata must be performed collectively. Given this assumption, it was
86  * safe to allow only the writes from process 0 to actually make it to disk,
87  * while metadata writes from all other processes were discarded.
88  *
89  * Unfortunately, this presumption is in error as operations that read
90  * metadata need not be collective, but can change the location of dirty
91  * entries in the metadata cache LRU lists. This can result in the same
92  * metadata write operation triggering writes from the metadata caches on
93  * some processes, but not all (causing a hang), or in different sets of
94  * entries being written from different caches (potentially resulting in
95  * metadata corruption in the file).
96  *
97  * To deal with this issue, I decided to apply a paradigm shift to the way
98  * metadata is written to disk.
99  *
100  * With this set of changes, only the metadata cache on process 0 is able
101  * to write metadata to disk, although metadata caches on all other
102  * processes can read metadata from disk as before.
103  *
104  * To keep all the other caches from getting plugged up with dirty metadata,
105  * process 0 periodically broadcasts a list of entries that it has flushed
106  * since that last notice, and which are currently clean. The other caches
107  * mark these entries as clean as well, which allows them to evict the
108  * entries as needed.
109  *
110  * One obvious problem in this approach is synchronizing the broadcasts
111  * and receptions, as different caches may see different amounts of
112  * activity.
113  *
114  * The current solution is for the caches to track the number of bytes
115  * of newly generated dirty metadata, and to broadcast and receive
116  * whenever this value exceeds some user specified threshold.
117  *
118  * Maintaining this count is easy for all processes not on process 0 --
119  * all that is necessary is to add the size of the entry to the total
120  * whenever there is an insertion, a move of a previously clean entry,
121  * or whever a previously clean entry is marked dirty in an unprotect.
122  *
123  * On process 0, we have to be careful not to count dirty bytes twice.
124  * If an entry is marked dirty, flushed, and marked dirty again, all
125  * within a single reporting period, it only th first marking should
126  * be added to the dirty bytes generated tally, as that is all that
127  * the other processes will see.
128  *
129  * At present, this structure exists to maintain the fields needed to
130  * implement the above scheme, and thus is only used in the parallel
131  * case. However, other uses may arise in the future.
132  *
133  * Instance of this structure are associated with metadata caches via
134  * the aux_ptr field of H5C_t (see H5Cpkg.h). The H5AC code is
135  * responsible for allocating, maintaining, and discarding instances
136  * of H5AC_aux_t.
137  *
138  * The remainder of this header comments documents the individual fields
139  * of the structure.
140  *
141  * JRM - 6/27/05
142  *
143  * magic: Unsigned 32 bit integer always set to
144  * H5AC__H5AC_AUX_T_MAGIC. This field is used to validate
145  * pointers to instances of H5AC_aux_t.
146  *
147  * mpi_comm: MPI communicator associated with the file for which the
148  * cache has been created.
149  *
150  * mpi_rank: MPI rank of this process within mpi_comm.
151  *
152  * mpi_size: Number of processes in mpi_comm.
153  *
154  * write_permitted: Boolean flag used to control whether the cache
155  * is permitted to write to file.
156  *
157  * dirty_bytes_threshold: Integer field containing the dirty bytes
158  * generation threshold. Whenever dirty byte creation
159  * exceeds this value, the metadata cache on process 0
160  * broadcasts a list of the entries it has flushed since
161  * the last broadcast (or since the beginning of execution)
162  * and which are currently clean (if they are still in the
163  * cache)
164  *
165  * Similarly, metadata caches on processes other than process
166  * 0 will attempt to receive a list of clean entries whenever
167  * the threshold is exceeded.
168  *
169  * dirty_bytes: Integer field containing the number of bytes of dirty
170  * metadata generated since the beginning of the computation,
171  * or (more typically) since the last clean entries list
172  * broadcast. This field is reset to zero after each such
173  * broadcast.
174  *
175  * metadata_write_strategy: Integer code indicating how we will be
176  * writing the metadata. In the first incarnation of
177  * this code, all writes were done from process 0. This
178  * field exists to facilitate experiments with other
179  * strategies.
180  *
181  * dirty_bytes_propagations: This field only exists when the
182  * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
183  *
184  * It is used to track the number of times the cleaned list
185  * has been propagated from process 0 to the other
186  * processes.
187  *
188  * unprotect_dirty_bytes: This field only exists when the
189  * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
190  *
191  * It is used to track the number of dirty bytes created
192  * via unprotect operations since the last time the cleaned
193  * list was propagated.
194  *
195  * unprotect_dirty_bytes_updates: This field only exists when the
196  * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
197  *
198  * It is used to track the number of times dirty bytes have
199  * been created via unprotect operations since the last time
200  * the cleaned list was propagated.
201  *
202  * insert_dirty_bytes: This field only exists when the
203  * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
204  *
205  * It is used to track the number of dirty bytes created
206  * via insert operations since the last time the cleaned
207  * list was propagated.
208  *
209  * insert_dirty_bytes_updates: This field only exists when the
210  * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
211  *
212  * It is used to track the number of times dirty bytes have
213  * been created via insert operations since the last time
214  * the cleaned list was propagated.
215  *
216  * move_dirty_bytes: This field only exists when the
217  * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
218  *
219  * It is used to track the number of dirty bytes created
220  * via move operations since the last time the cleaned
221  * list was propagated.
222  *
223  * move_dirty_bytes_updates: This field only exists when the
224  * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
225  *
226  * It is used to track the number of times dirty bytes have
227  * been created via move operations since the last time
228  * the cleaned list was propagated.
229  *
230  * Things have changed a bit since the following four fields were defined.
231  * If metadata_write_strategy is H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY,
232  * all comments hold as before -- with the caviate that pending further
233  * coding, the process 0 metadata cache is forbidden to flush entries outside
234  * of a sync point.
235  *
236  * However, for different metadata write strategies, these fields are used
237  * only to maintain the correct dirty byte count on process zero -- and in
238  * most if not all cases, this is redundant, as process zero will be barred
239  * from flushing entries outside of a sync point.
240  *
241  * JRM -- 3/16/10
242  *
243  * d_slist_ptr: Pointer to an instance of H5SL_t used to maintain a list
244  * of entries that have been dirtied since the last time they
245  * were listed in a clean entries broadcast. This list is
246  * only maintained by the metadata cache on process 0 -- it
247  * it used to maintain a view of the dirty entries as seen
248  * by the other caches, so as to keep the dirty bytes count
249  * in synchronization with them.
250  *
251  * Thus on process 0, the dirty_bytes count is incremented
252  * only if either
253  *
254  * 1) an entry is inserted in the metadata cache, or
255  *
256  * 2) a previously clean entry is moved, and it does not
257  * already appear in the dirty entry list, or
258  *
259  * 3) a previously clean entry is unprotected with the
260  * dirtied flag set and the entry does not already appear
261  * in the dirty entry list.
262  *
263  * Entries are added to the dirty entry list whever they cause
264  * the dirty bytes count to be increased. They are removed
265  * when they appear in a clean entries broadcast. Note that
266  * moves must be reflected in the dirty entry list.
267  *
268  * To reitterate, this field is only used on process 0 -- it
269  * should be NULL on all other processes.
270  *
271  * d_slist_len: Integer field containing the number of entries in the
272  * dirty entry list. This field should always contain the
273  * value 0 on all processes other than process 0. It exists
274  * primarily for sanity checking.
275  *
276  * c_slist_ptr: Pointer to an instance of H5SL_t used to maintain a list
277  * of entries that were dirty, have been flushed
278  * to disk since the last clean entries broadcast, and are
279  * still clean. Since only process 0 can write to disk, this
280  * list only exists on process 0.
281  *
282  * In essence, this slist is used to assemble the contents of
283  * the next clean entries broadcast. The list emptied after
284  * each broadcast.
285  *
286  * c_slist_len: Integer field containing the number of entries in the clean
287  * entries list (*c_slist_ptr). This field should always
288  * contain the value 0 on all processes other than process 0.
289  * It exists primarily for sanity checking.
290  *
291  * The following two fields are used only when metadata_write_strategy
292  * is H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED.
293  *
294  * candidate_slist_ptr: Pointer to an instance of H5SL_t used by process 0
295  * to construct a list of entries to be flushed at this sync
296  * point. This list is then broadcast to the other processes,
297  * which then either flush or mark clean all entries on it.
298  *
299  * candidate_slist_len: Integer field containing the number of entries on the
300  * candidate list. It exists primarily for sanity checking.
301  *
302  * write_done: In the parallel test bed, it is necessary to ensure that
303  * all writes to the server process from cache 0 complete
304  * before it enters the barrier call with the other caches.
305  *
306  * The write_done callback allows t_cache to do this without
307  * requiring an ACK on each write. Since these ACKs greatly
308  * increase the run time on some platforms, this is a
309  * significant optimization.
310  *
311  * This field must be set to NULL when the callback is not
312  * needed.
313  *
314  * Note: This field has been extended for use by all processes
315  * with the addition of support for the distributed
316  * metadata write strategy.
317  * JRM -- 5/9/10
318  *
319  * sync_point_done: In the parallel test bed, it is necessary to verify
320  * that the expected writes, and only the expected writes,
321  * have taken place at the end of each sync point.
322  *
323  * The sync_point_done callback allows t_cache to perform
324  * this verification. The field is set to NULL when the
325  * callback is not needed.
326  *
327  ****************************************************************************/
328 
329 #ifdef H5_HAVE_PARALLEL
330 
331 #define H5AC__H5AC_AUX_T_MAGIC (unsigned)0x00D0A01
332 
333 typedef struct H5AC_aux_t
334 {
335  uint32_t magic;
336 
337  MPI_Comm mpi_comm;
338 
339  int mpi_rank;
340 
341  int mpi_size;
342 
343  hbool_t write_permitted;
344 
345  int32_t dirty_bytes_threshold;
346 
347  int32_t dirty_bytes;
348 
349  int32_t metadata_write_strategy;
350 
351 #if H5AC_DEBUG_DIRTY_BYTES_CREATION
352 
353  int32_t dirty_bytes_propagations;
354 
355  int32_t unprotect_dirty_bytes;
356  int32_t unprotect_dirty_bytes_updates;
357 
358  int32_t insert_dirty_bytes;
359  int32_t insert_dirty_bytes_updates;
360 
361  int32_t move_dirty_bytes;
362  int32_t move_dirty_bytes_updates;
363 
364 #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
365 
366  H5SL_t * d_slist_ptr;
367 
368  int32_t d_slist_len;
369 
370  H5SL_t * c_slist_ptr;
371 
372  int32_t c_slist_len;
373 
374  H5SL_t * candidate_slist_ptr;
375 
376  int32_t candidate_slist_len;
377 
378  void (* write_done)(void);
379 
380  void (* sync_point_done)(int num_writes,
381  haddr_t * written_entries_tbl);
382 
383 } H5AC_aux_t; /* struct H5AC_aux_t */
384 
385 #endif /* H5_HAVE_PARALLEL */
386 
387 #endif /* _H5ACpkg_H */
388 
unsigned int hbool_t
Definition: H5public.h:142
uint64_t haddr_t
Definition: H5public.h:182

MISR Toolkit - Copyright © 2005 - 2020 Jet Propulsion Laboratory
Generated on Fri Jun 19 2020 22:49:52