/[dtapublic]/projs/dtats/trunk/projs/20161007_dedup/qdedup.c
ViewVC logotype

Contents of /projs/dtats/trunk/projs/20161007_dedup/qdedup.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 75 - (show annotations) (download)
Sat Nov 5 18:40:38 2016 UTC (8 years, 1 month ago) by dashley
Original Path: projs/trunk/projs/20161007_dedup/qdedup.c
File MIME type: text/plain
File size: 28351 byte(s)
Edits.
1 //----------------------------------------------------------------------------------------------------
2 //qdedup.c
3 //----------------------------------------------------------------------------------------------------
4 //Quick and dirty program to eliminate duplicates from a file tree. A file containing the SHA512
5 //hashes of all the files to be considered must already exist, and must be regenerated each time the
6 //underlying files are deleted/added/modified, which means the file must regenerated after each run
7 //of qdedup. (WARNING: IF YOU DO NOT REGENERATE THE FILE AFTER EACH RUN OF qdedup, YOU WILL
8 //PROBABLY DESTROY DATA. THE MECHANISM WOULD BE THAT THE SHA512 MANIFEST IMPLIES THAT DUPLICATES
9 //EXIST WHEN THEY NO LONGER DO, SO qdedup WILL ERRONEOUSLY DELETE THE LAST COPIES OF FILES.) The
10 //program will eliminate duplicates within a single specified directory or outside a single specified
11 //directory.
12 //
13 //This program will compile and run only on *nix systems and under Cygwin on Windows systems.
14 //----------------------------------------------------------------------------------------------------
15 //Copyright David T. Ashley (dashley@gmail.com), 2016.
16 //----------------------------------------------------------------------------------------------------
17 //Provided under the MIT LICENSE, reproduced immediately below.
18 //----------------------------------------------------------------------------------------------------
19 //Permission is hereby granted, free of charge, to any person obtaining a copy of
20 //this software and associated documentation files (the "Software"), to deal in the
21 //Software without restriction, including without limitation the rights to use,
22 //copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
23 //Software, and to permit persons to whom the Software is furnished to do so,
24 //subject to the following conditions:
25 //
26 //The above copyright notice and this permission notice shall be included in all
27 //copies or substantial portions of the Software.
28 //
29 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 //FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 //AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34 //OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 //SOFTWARE.
36 //----------------------------------------------------------------------------------------------------
37 //All paths in the SHA512 file must be absolute or must be relative to the current working directory
38 //at the time this program is run.
39 //
40 //The recommended method to generate the SHA512 file is using the "-exec" option of the "find"
41 //command, i.e.
42 //
43 // find target_directory -type f -exec sha512sum {} \; >sha512sums.txt
44 //
45 //If any files are deleted by the program, a new SHA512 file must be generated before the program is
46 //run again to delete files. The reason for this restriction is that the program will never knowingly
47 //delete the last copy of a file. If the SHA512 file contains the digests of files that no longer
48 //exist, the program may unknowingly delete the last copies of files (because it believes based on
49 //the SHA512 file that other copies exist when in fact they do not).
50 //
51 //The SHA512 file does not need to be sorted (this program sorts it internally by hash before using it).
52 //
53 //This program is designed to compile and run under Cygwin or *nix only.
54 //
55 //Usage:
56 // qdedup
57 // Prints help information and exits.
58 // qdedup ndups <sha512file>
59 // Prints statistics about the number of duplicates in <sha512file>.
60 // qdedup filterdups <sha512file>
61 // Analyzes duplicates and prints the filenames of groups of duplicates. The output is designed
62 // for hand analysis so that insight can be gained into what duplicates exist and where they
63 // are located.
64 // qdedup dedup_preserve_inside <sha512file> <path>
65 // For each group of duplicates that exists, preserves the duplicates that exist within path
66 // and removes all others. If no copies of the duplicate exist within path, no copies of the
67 // duplicate will be removed.
68 // qdedup dryrun_preserve_inside <sha512file> <path>
69 // Exactly like "dedup_preserve_inside", except that no files will be deleted. Text will be
70 // output to explain what would be deleted by "dedup_preserve_inside".
71 // qdedup dedup_nopath <sha512file>
72 // For each group of duplicates that exists, preserves only the first (the only with lowest
73 // sort-order filename).
74 // qdedup dryrun_nopath <sha512file> <path>
75 // Exactly like "dedup_nopath", except that no files will be deleted. Text will be
76 // output to explain what would be deleted by "dedup_nopath".
77 // qdedup dedup_preserve_outside <sha512file> <path>
78 // For each group of duplicates that exists, deletes duplicates only from within the specified
79 // path. If any duplicates do not have at least one copy within <path> no instances of the
80 // duplicate are deleted.
81 // qdedup dryrun_preserve_outside <sha512file>
82 // Exactly like "dedup_preserve_outside", except that no files will be deleted. Text will be
83 // output to explain what would be deleted by "dedup_preserve_outside".
84 //----------------------------------------------------------------------------------------------------
85 #include <math.h>
86 #include <stdio.h>
87 #include <stdlib.h>
88 #include <string.h>
89 #include <time.h>
90 #include <unistd.h>
91 //----------------------------------------------------------------------------------------------------
92 #define LINELEN (78) //Number of printable characters in a line.
93 #define MAXLINELEN (2000) //The maximum number of characters that may be in a line of the
94 //SHA512 input file. This count includes the \0 terminator, so only
95 //this value minus 1 characters may be in a line.
96 #define UNLINKPAUSETIME (0.1) //Number of seconds to pause between file unlinks (deletions). This
97 //is designed to give the user time to abort the program if desired
98 //before catastrophic quantities of files are deleted.
99 //----------------------------------------------------------------------------------------------------
100 //Data structure that holds the character representation of and SHA512 hash, plus the specified
101 //filename.
102 typedef struct
103 {
104 char hash[129];
105 //512/4 = 128 characters for the hash, plus 1 character for zero terminator.
106 char *fname;
107 //Filename as specified in the file, allocated via malloc() family.
108 } tFileHashRecord;
109 //----------------------------------------------------------------------------------------------------
110 //----------------------------------------------------------------------------------------------------
111 //----- CHARACTER CLASSIFICATION FUNCTIONS ---------------------------------------------------------
112 //----------------------------------------------------------------------------------------------------
113 //----------------------------------------------------------------------------------------------------
114 //TRUE if character is part of valid hash.
115 int is_valid_hash_char(char c)
116 {
117 switch(c)
118 {
119 case '0':
120 case '1':
121 case '2':
122 case '3':
123 case '4':
124 case '5':
125 case '6':
126 case '7':
127 case '8':
128 case '9':
129 case 'a':
130 case 'b':
131 case 'c':
132 case 'd':
133 case 'e':
134 case 'f':
135 return(1);
136 break;
137 default:
138 return(0);
139 break;
140 }
141 }
142 //----------------------------------------------------------------------------------------------------
143 //TRUE if character is part of newline sequence
144 int is_newline_sequence_char(char c)
145 {
146 switch(c)
147 {
148 case 13:
149 case 10:
150 return(1);
151 break;
152 default:
153 return(0);
154 break;
155 }
156 }
157
158 //----------------------------------------------------------------------------------------------------
159 //----------------------------------------------------------------------------------------------------
160 //----- FORMATTED OUTPUT FUNCTIONS -----------------------------------------------------------------
161 //----------------------------------------------------------------------------------------------------
162 //----------------------------------------------------------------------------------------------------
163 //Repeats a character to a stream a specified number of times.
164 //
165 void stream_rep_char(FILE *s, char c, unsigned n)
166 {
167 while(n--)
168 {
169 fprintf(s, "%c", c);
170 }
171 }
172 //----------------------------------------------------------------------------------------------------
173 //Prints a horizontal line to a stream, including the newline.
174 //
175 void stream_hline(FILE *s)
176 {
177 stream_rep_char(s, '-', LINELEN);
178 fprintf(s, "\n");
179 }
180 //----------------------------------------------------------------------------------------------------
181 //Prints a horizontal line to a stdout, including the newline.
182 //
183 void stdout_hline(void)
184 {
185 stream_rep_char(stdout, '-', LINELEN);
186 fprintf(stdout, "\n");
187 }
188 //----------------------------------------------------------------------------------------------------
189 //----------------------------------------------------------------------------------------------------
190 //----- FATAL ERROR FUNCTIONS ----------------------------------------------------------------------
191 //----------------------------------------------------------------------------------------------------
192 //----------------------------------------------------------------------------------------------------
193 //Errors out fatally.
194 //
195 void fatal(const char *desc, const char *file, unsigned line)
196 {
197 stdout_hline();
198 printf("Fatal error: %s\n", desc);
199 printf("Source file: %s\n", file);
200 printf("Line : %u\n", line);
201 stdout_hline();
202 exit(1);
203 }
204 //----------------------------------------------------------------------------------------------------
205 //----------------------------------------------------------------------------------------------------
206 //----- MEMORY ALLOCATION WRAPPERS -----------------------------------------------------------------
207 //----------------------------------------------------------------------------------------------------
208 //----------------------------------------------------------------------------------------------------
209 //malloc() wrapper.
210 void *w_malloc(size_t nbytes)
211 {
212 void *rv;
213
214 if (!nbytes)
215 {
216 fatal("Memory allocation request for 0 bytes.", __FILE__, __LINE__);
217 }
218
219 rv = malloc(nbytes);
220
221 if (!rv)
222 {
223 fatal("Out of memory in malloc() request.", __FILE__, __LINE__);
224 }
225
226 //Zero out, just for consistency.
227 memset(rv, 0, nbytes);
228 }
229 //----------------------------------------------------------------------------------------------------
230 //realloc() wrapper.
231 void *w_realloc(void *p, size_t n)
232 {
233 void *rv;
234
235 if (!n)
236 {
237 fatal("Memory reallocation request for 0 bytes.", __FILE__, __LINE__);
238 }
239
240 if (!p)
241 {
242 fatal("Memory reallocation request with NULL pointer.", __FILE__, __LINE__);
243 }
244
245 rv = realloc(p, n);
246
247 if (!rv)
248 {
249 fatal("Out of memory in realloc() request.", __FILE__, __LINE__);
250 }
251 }
252 //----------------------------------------------------------------------------------------------------
253 //----------------------------------------------------------------------------------------------------
254 //----- SLEEP FUNCTIONS ----------------------------------------------------------------------------
255 //----------------------------------------------------------------------------------------------------
256 //----------------------------------------------------------------------------------------------------
257 //Sleep for a time, in seconds.
258 void w_sleep(double seconds)
259 {
260 struct timespec t;
261
262 if (seconds < 0)
263 {
264 fatal("Sleep for negative time request.", __FILE__, __LINE__);
265 }
266 else if (seconds > 3600)
267 {
268 fatal("Sleep for too long request.", __FILE__, __LINE__);
269 }
270
271 t.tv_sec = floor(seconds);
272 t.tv_nsec = (seconds - floor(seconds)) * 1E9;
273
274 nanosleep(&t, NULL);
275 }
276 //----------------------------------------------------------------------------------------------------
277 //----------------------------------------------------------------------------------------------------
278 //----- SHA512 FIELD READ FUNCTIONS ----------------------------------------------------------------
279 //----------------------------------------------------------------------------------------------------
280 //----------------------------------------------------------------------------------------------------
281 //These functions read in an individual field of a standard SHA512 file generated using application
282 //of the standard sha512sum program.
283 //
284 //*rcode = 1, success.
285 // 0, legal end of file, record assigned.
286 void get_sha512file_line(FILE *s, int *rcode, tFileHashRecord *hash_rec)
287 {
288 unsigned bidx;
289 unsigned nchars;
290 int ic;
291 int exitflag;
292 int eoffound;
293 int eolfound;
294 char c;
295 char buf[MAXLINELEN];
296
297 //Zero out the buffer. This handles string termination automatically.
298 memset(buf, 0, sizeof(buf));
299
300 //Read characters into the buffer until either hit EOF, newline, or can't
301 //fill the buffer any longer.
302 eoffound = 0;
303 eolfound = 0;
304 exitflag = 0;
305 bidx = 0;
306 do
307 {
308 ic = fgetc(s);
309 c = ic;
310
311 if (ic == EOF)
312 {
313 eoffound = 1;
314 eolfound = 0;
315 nchars = bidx;
316 exitflag = 1;
317 }
318 else if (is_newline_sequence_char(c))
319 {
320 eoffound = 0;
321 eolfound = 1;
322 nchars = bidx;
323 exitflag = 1;
324 }
325 else if (bidx >= (MAXLINELEN - 1))
326 {
327 fatal("SHA512 hash file line too long to parse.", __FILE__, __LINE__);
328 }
329 else
330 {
331 buf[bidx] = c;
332 bidx++;
333 exitflag = 0;
334 }
335 } while(! exitflag);
336
337 //If we encountered a newline, inch past it. We may encounter an EOF.
338 if (eolfound)
339 {
340 exitflag = 0;
341 do
342 {
343 ic = fgetc(s);
344 c = ic;
345
346 if (ic == EOF)
347 {
348 eoffound = 1;
349 eolfound = 0;
350 exitflag = 1;
351 }
352 else if (is_newline_sequence_char(c))
353 {
354 exitflag = 0;
355 }
356 else
357 {
358 //We hit the next line. Put the character back.
359 eoffound = 0;
360 eolfound = 1;
361 ungetc(ic, s);
362 exitflag = 1;
363 }
364 } while(! exitflag);
365 }
366
367 //For better or worse, we have a \0-terminated line in the buffer.
368 //
369 //Zero the caller's area. This takes care of the hash terminator as well.
370 memset(hash_rec, 0, sizeof(*hash_rec));
371
372 //Ensure that we have at least 128 characters, and they are all hex characters.
373 //Otherwise, we can't proceed.
374 if (nchars < 128)
375 {
376 fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
377 }
378 else
379 {
380 for (bidx = 0; bidx < 128; bidx++)
381 {
382 if (! is_valid_hash_char(buf[bidx]))
383 {
384 fatal("Character in SHA512 hash portion of line inconsistent with hash.", __FILE__, __LINE__);
385 }
386 }
387 }
388
389 //The 129th and 130'th character must be present and must be a space and asterisk, respectively.
390 if (nchars < 130)
391 {
392 fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
393 }
394 else if (buf[128] != ' ')
395 {
396 fatal("129th hash line character must be \" \".", __FILE__, __LINE__);
397 }
398 // else if (buf[129] != '*')
399 // {
400 // fatal("130th hash line character must be \"*\".", __FILE__, __LINE__);
401 // }
402 else if (buf[129] != ' ')
403 {
404 //130th character is ' '. Need to figure out why sometimes space and sometimes '*'.
405 fatal("130th hash line character must be \" \".", __FILE__, __LINE__);
406 }
407
408 //There must be a 131'st character. Beyond that, we can't qualify, because filenames may
409 //have odd characters and may be of any length.
410 if (nchars < 131)
411 {
412 fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
413 }
414
415 //Copy the hash to the caller's area. The terminator has already been inserted.
416 memcpy(&(hash_rec->hash[0]), buf, 128);
417
418 //Allocate space for the filename.
419 hash_rec->fname = w_malloc(strlen(buf+130) + 1);
420
421 //Make the copy.
422 strcpy(hash_rec->fname, buf+130);
423
424 if (eoffound)
425 *rcode = 0;
426 else
427 *rcode = 1;
428 }
429 //----------------------------------------------------------------------------------------------------
430 void parseinputfile(tFileHashRecord **parsed_recs, unsigned *count, char *fname)
431 {
432 FILE *s;
433 int rcode;
434
435 //Try to open the file for reading. Inability is a failure.
436 s = fopen(fname, "r");
437 if (!s)
438 {
439 fatal("Hash file open failure.", __FILE__, __LINE__);
440 }
441
442 //Start off with a count of 0 and a NULL pointer.
443 *count = 0;
444 *parsed_recs = NULL;
445
446 do
447 {
448 //For the first time, allocate space for one record. Beyond that,
449 //expand it.
450 if (! *parsed_recs)
451 {
452 *parsed_recs = w_malloc(sizeof(tFileHashRecord));
453 }
454 else
455 {
456 *parsed_recs = w_realloc(*parsed_recs, (size_t)((*count + 1)) * sizeof(tFileHashRecord));
457 }
458
459 //Parse and fill in the space.
460 get_sha512file_line(s, &rcode, (*parsed_recs) + (*count));
461
462 //We now have one more.
463 (*count)++;
464 } while(rcode == 1);
465
466 //Try to close the file. Inability is a failure.
467 if (fclose(s))
468 {
469 fatal("Hash file close failure.", __FILE__, __LINE__);
470 }
471 }
472 //----------------------------------------------------------------------------------------------------
473 int sortcmpascendinghash(const void *p0_in, const void *p1_in)
474 {
475 const tFileHashRecord *p0, *p1;
476
477 p0 = p0_in;
478 p1 = p1_in;
479
480 return(strcmp(p0->hash, p1->hash));
481 }
482
483 //----------------------------------------------------------------------------------------------------
484 void sortinternaldsbyhash(tFileHashRecord *parsed_recs, unsigned count)
485 {
486 qsort(parsed_recs, count, sizeof(tFileHashRecord), sortcmpascendinghash);
487 }
488 //----------------------------------------------------------------------------------------------------
489 int sortcmpascendingfname(const void *p0_in, const void *p1_in)
490 {
491 const tFileHashRecord *p0, *p1;
492
493 p0 = p0_in;
494 p1 = p1_in;
495
496 return(strcmp(p0->fname, p1->fname));
497 }
498 //----------------------------------------------------------------------------------------------------
499 //This sort has to be run after the hash sort. Within groups of identical hashes, it sorts by
500 //ascending filename.
501 void sortinternalgroupfname(tFileHashRecord *parsed_recs, unsigned count)
502 {
503 unsigned ui;
504 unsigned i_group_min, i_group_max;
505
506 if (! count)
507 return;
508
509 i_group_min = 0;
510 i_group_max = 0;
511
512 do
513 {
514 //Advance i_group_max to the end of the group of duplicates.
515 while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
516 {
517 i_group_max++;
518 }
519
520 if (i_group_min != i_group_max)
521 {
522 //Sort the internal group.
523 qsort(parsed_recs + i_group_min,
524 i_group_max - i_group_min + 1,
525 sizeof(tFileHashRecord),
526 sortcmpascendingfname);
527 }
528
529 //On to the next group.
530 i_group_max++;
531 i_group_min = i_group_max;
532
533 } while (i_group_max < (count - 1));
534 }
535 //----------------------------------------------------------------------------------------------------
536 void printsinglerecord(tFileHashRecord *rec, unsigned elno)
537 {
538 printf("[%9u]\n", elno);
539 printf("Hash : %s\n", rec->hash);
540 printf("Filename : %s\n", rec->fname);
541 stdout_hline();
542 }
543 //----------------------------------------------------------------------------------------------------
544 void printinternalds(tFileHashRecord *parsed_recs, unsigned count)
545 {
546 unsigned i;
547
548 for (i=0; i<count; i++)
549 {
550 printsinglerecord(parsed_recs + i, i);
551 }
552 }
553 //----------------------------------------------------------------------------------------------------
554 void gather_dup_stats(tFileHashRecord *parsed_recs, unsigned count, unsigned *out_num_dups, unsigned *out_cumulative_dups)
555 {
556 unsigned i_group_min, i_group_max;
557
558 *out_num_dups = 0;
559 *out_cumulative_dups = 0;
560
561 if (! count)
562 return;
563
564 i_group_min = 0;
565 i_group_max = 0;
566
567 do
568 {
569 //Advance i_group_max to the end of the group of duplicates.
570 while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
571 {
572 i_group_max++;
573 }
574
575 //Log the findings.
576 if (i_group_min != i_group_max)
577 {
578 (*out_num_dups)++;
579 (*out_cumulative_dups) += (i_group_max - i_group_min + 1);
580 }
581
582 //On to the next group.
583 i_group_max++;
584 i_group_min = i_group_max;
585
586 } while (i_group_max < (count - 1));
587 }
588 //----------------------------------------------------------------------------------------------------
589 void option_dups(char *fname)
590 {
591 tFileHashRecord *parsed_recs;
592 unsigned count, num_dups, cumulative_dups;
593
594 parseinputfile(&parsed_recs, &count, fname);
595 //printf("%u records parsed.\n", count);
596 sortinternaldsbyhash(parsed_recs, count);
597 sortinternalgroupfname(parsed_recs, count);
598 printinternalds(parsed_recs, count);
599 stdout_hline();
600 gather_dup_stats(parsed_recs, count, &num_dups, &cumulative_dups);
601 printf("Number of duplicated files : %u\n", num_dups);
602 if (num_dups)
603 {
604 printf("Average number of duplicates: %.2f\n", (double)cumulative_dups/(double)num_dups);
605 }
606 }
607 //----------------------------------------------------------------------------------------------------
608 void option_filterdups(char *fname)
609 {
610 tFileHashRecord *parsed_recs;
611 unsigned dupgroup;
612 unsigned count;
613 unsigned ui;
614 unsigned i_group_min, i_group_max;
615
616 parseinputfile(&parsed_recs, &count, fname);
617 //printf("%u records parsed.\n", count);
618 sortinternaldsbyhash(parsed_recs, count);
619 sortinternalgroupfname(parsed_recs, count);
620
621 if (! count)
622 return;
623
624 dupgroup = 0;
625 i_group_min = 0;
626 i_group_max = 0;
627
628 do
629 {
630 //Advance i_group_max to the end of the group of duplicates.
631 while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
632 {
633 i_group_max++;
634 }
635
636 //Print the findings.
637 if (i_group_min != i_group_max)
638 {
639 printf("Duplicate group %u:\n", dupgroup);
640 for (ui = i_group_min; ui <= i_group_max; ui++)
641 {
642 printf("%s\n", parsed_recs[ui].fname);
643 }
644
645 dupgroup++;
646
647 stdout_hline();
648 }
649
650 //On to the next group.
651 i_group_max++;
652 i_group_min = i_group_max;
653
654 } while (i_group_max < (count - 1));
655 }
656 //----------------------------------------------------------------------------------------------------
657 //Returns true if the filename is within the specified path, or false otherwise.
658 int is_path_member(const char *fname, const char *path)
659 {
660 if (strlen(fname) == 0)
661 {
662 fatal("Zero-length filename.", __FILE__, __LINE__);
663 }
664 else if (strlen(path) == 0)
665 {
666 fatal("Zero-length path.", __FILE__, __LINE__);
667 }
668 else if (path[strlen(path) - 1] != '/')
669 {
670 fatal("Paths must canonically end with forward slash character.", __FILE__, __LINE__);
671 }
672 else if (strlen(fname) <= strlen(path))
673 {
674 //Can't be in the path because filename is not longer than path name.
675 return 0;
676 }
677 else if (memcmp(fname, path, strlen(path)) == 0)
678 {
679 return 1;
680 }
681 else
682 {
683 return 0;
684 }
685 }
686 //----------------------------------------------------------------------------------------------------
687 void option_dedup(char *fname, char *path, int may_delete, double pause_time)
688 {
689 tFileHashRecord *parsed_recs;
690 unsigned dupgroup;
691 unsigned count;
692 unsigned ui;
693 unsigned within_path;
694 unsigned i_group_min, i_group_max;
695
696 parseinputfile(&parsed_recs, &count, fname);
697 //printf("%u records parsed.\n", count);
698 sortinternaldsbyhash(parsed_recs, count);
699 sortinternalgroupfname(parsed_recs, count);
700
701 if (! count)
702 return;
703
704 dupgroup = 0;
705 i_group_min = 0;
706 i_group_max = 0;
707
708 do
709 {
710 //Advance i_group_max to the end of the group of duplicates.
711 while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
712 {
713 i_group_max++;
714 }
715
716 //If this is a group of duplicates.
717 if (i_group_min != i_group_max)
718 {
719 //Print the findings.
720 printf("Duplicate group %u:\n", dupgroup);
721 for (ui = i_group_min; ui <= i_group_max; ui++)
722 {
723 printf("%s\n", parsed_recs[ui].fname);
724 }
725
726 dupgroup++;
727
728 stdout_hline();
729
730 //Count how many of the group of duplicates are within the supplied path.
731 within_path = 0;
732 for (ui = i_group_min; ui <= i_group_max; ui++)
733 {
734 if (is_path_member(parsed_recs[ui].fname, path))
735 {
736 within_path++;
737 }
738 }
739
740 //We have to take different actions based on whether we do or don't have any within path.
741 //If we don't have any, we may delete nothing.
742 if (! within_path)
743 {
744 printf("None of these duplicates in path--taking no action.\n");
745 //stdout_hline();
746 }
747 else
748 {
749 for (ui = i_group_min; ui <= i_group_max; ui++)
750 {
751 if (is_path_member(parsed_recs[ui].fname, path))
752 {
753 printf("Not deleting: %s\n", parsed_recs[ui].fname);
754 }
755 else
756 {
757 printf("Deleting : %s\n", parsed_recs[ui].fname);
758 if (may_delete)
759 {
760 if (! unlink(parsed_recs[ui].fname))
761 {
762 printf(" File deleted (unlinked) successfully.\n");
763 }
764 else
765 {
766 printf(" Failure attempting to delete (unlink) file.\n");
767 }
768 }
769 else
770 {
771 printf(" Dry run only.\n");
772 }
773 }
774
775 //w_sleep(pause_time);
776 }
777 }
778
779 stdout_hline();
780 }
781
782 //On to the next group.
783 i_group_max++;
784 i_group_min = i_group_max;
785
786 } while (i_group_max < (count - 1));
787 }
788 //----------------------------------------------------------------------------------------------------
789 int main(int argc, char* argv[])
790 {
791 stdout_hline();
792 printf("Execution begins.\n");
793 stdout_hline();
794
795 if (argc == 1)
796 {
797 }
798 else if ((argc == 3) && (strcmp(argv[1], "ndups") == 0))
799 {
800 option_dups(argv[2]);
801 }
802 else if ((argc == 3) && (strcmp(argv[1], "filterdups") == 0))
803 {
804 option_filterdups(argv[2]);
805 }
806 else if ((argc == 3) && (strcmp(argv[1], "dedup_nopath") == 0))
807 {
808 //option_filterdups(argv[2]);
809 }
810 else if ((argc == 3) && (strcmp(argv[1], "dryrun_nopath") == 0))
811 {
812 //option_filterdups(argv[2]);
813 }
814 else if ((argc == 4) && (strcmp(argv[1], "dedup_preserve_inside") == 0))
815 {
816 option_dedup(argv[2], argv[3], 1, UNLINKPAUSETIME);
817 }
818 else if ((argc == 4) && (strcmp(argv[1], "dryrun_preserve_inside") == 0))
819 {
820 option_dedup(argv[2], argv[3], 0, UNLINKPAUSETIME/10.0);
821 }
822 else
823 {
824 printf("Unrecognized parameter form. Try \"dedup\".\n");
825 }
826
827 //w_sleep(-3 /* UNLINKPAUSETIME*/ );
828
829 //stdout_hline();
830 printf("Execution ends.\n");
831 stdout_hline();
832
833 return 0;
834 }
835 //----------------------------------------------------------------------------------------------------
836

Properties

Name Value
svn:eol-style native

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25