@@ -32,38 +32,96 @@ static unsigned int hash_name(const char *name, int namelen)
32
32
return hash ;
33
33
}
34
34
35
- static void hash_index_entry_directories (struct index_state * istate , struct cache_entry * ce )
35
+ struct dir_entry {
36
+ struct dir_entry * next ;
37
+ struct dir_entry * parent ;
38
+ struct cache_entry * ce ;
39
+ int nr ;
40
+ unsigned int namelen ;
41
+ };
42
+
43
+ static struct dir_entry * find_dir_entry (struct index_state * istate ,
44
+ const char * name , unsigned int namelen )
45
+ {
46
+ unsigned int hash = hash_name (name , namelen );
47
+ struct dir_entry * dir ;
48
+
49
+ for (dir = lookup_hash (hash , & istate -> dir_hash ); dir ; dir = dir -> next )
50
+ if (dir -> namelen == namelen &&
51
+ !strncasecmp (dir -> ce -> name , name , namelen ))
52
+ return dir ;
53
+ return NULL ;
54
+ }
55
+
56
+ static struct dir_entry * hash_dir_entry (struct index_state * istate ,
57
+ struct cache_entry * ce , int namelen )
36
58
{
37
59
/*
38
60
* Throw each directory component in the hash for quick lookup
39
61
* during a git status. Directory components are stored with their
40
62
* closing slash. Despite submodules being a directory, they never
41
63
* reach this point, because they are stored without a closing slash
42
- * in the cache .
64
+ * in index_state.name_hash (as ordinary cache_entries) .
43
65
*
44
- * Note that the cache_entry stored with the directory does not
45
- * represent the directory itself. It is a pointer to an existing
46
- * filename, and its only purpose is to represent existence of the
47
- * directory in the cache. It is very possible multiple directory
48
- * hash entries may point to the same cache_entry.
66
+ * Note that the cache_entry stored with the dir_entry merely
67
+ * supplies the name of the directory (up to dir_entry.namelen). We
68
+ * track the number of 'active' files in a directory in dir_entry.nr,
69
+ * so we can tell if the directory is still relevant, e.g. for git
70
+ * status. However, if cache_entries are removed, we cannot pinpoint
71
+ * an exact cache_entry that's still active. It is very possible that
72
+ * multiple dir_entries point to the same cache_entry.
49
73
*/
50
- unsigned int hash ;
51
- void * * pos ;
74
+ struct dir_entry * dir ;
75
+
76
+ /* get length of parent directory */
77
+ while (namelen > 0 && !is_dir_sep (ce -> name [namelen - 1 ]))
78
+ namelen -- ;
79
+ if (namelen <= 0 )
80
+ return NULL ;
81
+
82
+ /* lookup existing entry for that directory */
83
+ dir = find_dir_entry (istate , ce -> name , namelen );
84
+ if (!dir ) {
85
+ /* not found, create it and add to hash table */
86
+ void * * pdir ;
87
+ unsigned int hash = hash_name (ce -> name , namelen );
52
88
53
- const char * ptr = ce -> name ;
54
- while (* ptr ) {
55
- while (* ptr && * ptr != '/' )
56
- ++ ptr ;
57
- if (* ptr == '/' ) {
58
- ++ ptr ;
59
- hash = hash_name (ce -> name , ptr - ce -> name );
60
- pos = insert_hash (hash , ce , & istate -> name_hash );
61
- if (pos ) {
62
- ce -> dir_next = * pos ;
63
- * pos = ce ;
64
- }
89
+ dir = xcalloc (1 , sizeof (struct dir_entry ));
90
+ dir -> namelen = namelen ;
91
+ dir -> ce = ce ;
92
+
93
+ pdir = insert_hash (hash , dir , & istate -> dir_hash );
94
+ if (pdir ) {
95
+ dir -> next = * pdir ;
96
+ * pdir = dir ;
65
97
}
98
+
99
+ /* recursively add missing parent directories */
100
+ dir -> parent = hash_dir_entry (istate , ce , namelen - 1 );
66
101
}
102
+ return dir ;
103
+ }
104
+
105
+ static void add_dir_entry (struct index_state * istate , struct cache_entry * ce )
106
+ {
107
+ /* Add reference to the directory entry (and parents if 0). */
108
+ struct dir_entry * dir = hash_dir_entry (istate , ce , ce_namelen (ce ));
109
+ while (dir && !(dir -> nr ++ ))
110
+ dir = dir -> parent ;
111
+ }
112
+
113
+ static void remove_dir_entry (struct index_state * istate , struct cache_entry * ce )
114
+ {
115
+ /*
116
+ * Release reference to the directory entry (and parents if 0).
117
+ *
118
+ * Note: we do not remove / free the entry because there's no
119
+ * hash.[ch]::remove_hash and dir->next may point to other entries
120
+ * that are still valid, so we must not free the memory.
121
+ */
122
+ struct dir_entry * dir = hash_dir_entry (istate , ce , ce_namelen (ce ));
123
+ while (dir && dir -> nr && !(-- dir -> nr ))
124
+ dir = dir -> parent ;
67
125
}
68
126
69
127
static void hash_index_entry (struct index_state * istate , struct cache_entry * ce )
@@ -74,16 +132,16 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
74
132
if (ce -> ce_flags & CE_HASHED )
75
133
return ;
76
134
ce -> ce_flags |= CE_HASHED ;
77
- ce -> next = ce -> dir_next = NULL ;
135
+ ce -> next = NULL ;
78
136
hash = hash_name (ce -> name , ce_namelen (ce ));
79
137
pos = insert_hash (hash , ce , & istate -> name_hash );
80
138
if (pos ) {
81
139
ce -> next = * pos ;
82
140
* pos = ce ;
83
141
}
84
142
85
- if (ignore_case )
86
- hash_index_entry_directories (istate , ce );
143
+ if (ignore_case && !( ce -> ce_flags & CE_UNHASHED ) )
144
+ add_dir_entry (istate , ce );
87
145
}
88
146
89
147
static void lazy_init_name_hash (struct index_state * istate )
@@ -99,11 +157,33 @@ static void lazy_init_name_hash(struct index_state *istate)
99
157
100
158
void add_name_hash (struct index_state * istate , struct cache_entry * ce )
101
159
{
160
+ /* if already hashed, add reference to directory entries */
161
+ if (ignore_case && (ce -> ce_flags & CE_STATE_MASK ) == CE_STATE_MASK )
162
+ add_dir_entry (istate , ce );
163
+
102
164
ce -> ce_flags &= ~CE_UNHASHED ;
103
165
if (istate -> name_hash_initialized )
104
166
hash_index_entry (istate , ce );
105
167
}
106
168
169
+ /*
170
+ * We don't actually *remove* it, we can just mark it invalid so that
171
+ * we won't find it in lookups.
172
+ *
173
+ * Not only would we have to search the lists (simple enough), but
174
+ * we'd also have to rehash other hash buckets in case this makes the
175
+ * hash bucket empty (common). So it's much better to just mark
176
+ * it.
177
+ */
178
+ void remove_name_hash (struct index_state * istate , struct cache_entry * ce )
179
+ {
180
+ /* if already hashed, release reference to directory entries */
181
+ if (ignore_case && (ce -> ce_flags & CE_STATE_MASK ) == CE_HASHED )
182
+ remove_dir_entry (istate , ce );
183
+
184
+ ce -> ce_flags |= CE_UNHASHED ;
185
+ }
186
+
107
187
static int slow_same_name (const char * name1 , int len1 , const char * name2 , int len2 )
108
188
{
109
189
if (len1 != len2 )
@@ -137,18 +217,7 @@ static int same_name(const struct cache_entry *ce, const char *name, int namelen
137
217
if (!icase )
138
218
return 0 ;
139
219
140
- /*
141
- * If the entry we're comparing is a filename (no trailing slash), then compare
142
- * the lengths exactly.
143
- */
144
- if (name [namelen - 1 ] != '/' )
145
- return slow_same_name (name , namelen , ce -> name , len );
146
-
147
- /*
148
- * For a directory, we point to an arbitrary cache_entry filename. Just
149
- * make sure the directory portion matches.
150
- */
151
- return slow_same_name (name , namelen , ce -> name , namelen < len ? namelen : len );
220
+ return slow_same_name (name , namelen , ce -> name , len );
152
221
}
153
222
154
223
struct cache_entry * index_name_exists (struct index_state * istate , const char * name , int namelen , int icase )
@@ -164,27 +233,54 @@ struct cache_entry *index_name_exists(struct index_state *istate, const char *na
164
233
if (same_name (ce , name , namelen , icase ))
165
234
return ce ;
166
235
}
167
- if (icase && name [namelen - 1 ] == '/' )
168
- ce = ce -> dir_next ;
169
- else
170
- ce = ce -> next ;
236
+ ce = ce -> next ;
171
237
}
172
238
173
239
/*
174
- * Might be a submodule. Despite submodules being directories,
240
+ * When looking for a directory (trailing '/'), it might be a
241
+ * submodule or a directory. Despite submodules being directories,
175
242
* they are stored in the name hash without a closing slash.
176
- * When ignore_case is 1, directories are stored in the name hash
177
- * with their closing slash.
243
+ * When ignore_case is 1, directories are stored in a separate hash
244
+ * table * with* their closing slash.
178
245
*
179
246
* The side effect of this storage technique is we have need to
247
+ * lookup the directory in a separate hash table, and if not found
180
248
* remove the slash from name and perform the lookup again without
181
249
* the slash. If a match is made, S_ISGITLINK(ce->mode) will be
182
250
* true.
183
251
*/
184
252
if (icase && name [namelen - 1 ] == '/' ) {
253
+ struct dir_entry * dir = find_dir_entry (istate , name , namelen );
254
+ if (dir && dir -> nr )
255
+ return dir -> ce ;
256
+
185
257
ce = index_name_exists (istate , name , namelen - 1 , icase );
186
258
if (ce && S_ISGITLINK (ce -> ce_mode ))
187
259
return ce ;
188
260
}
189
261
return NULL ;
190
262
}
263
+
264
+ static int free_dir_entry (void * entry , void * unused )
265
+ {
266
+ struct dir_entry * dir = entry ;
267
+ while (dir ) {
268
+ struct dir_entry * next = dir -> next ;
269
+ free (dir );
270
+ dir = next ;
271
+ }
272
+ return 0 ;
273
+ }
274
+
275
+ void free_name_hash (struct index_state * istate )
276
+ {
277
+ if (!istate -> name_hash_initialized )
278
+ return ;
279
+ istate -> name_hash_initialized = 0 ;
280
+ if (ignore_case )
281
+ /* free directory entries */
282
+ for_each_hash (& istate -> dir_hash , free_dir_entry , NULL );
283
+
284
+ free_hash (& istate -> name_hash );
285
+ free_hash (& istate -> dir_hash );
286
+ }
0 commit comments