/[dtapublic]/projs/dtats/trunk/projs/20161007_dedup/qdedup.c
ViewVC logotype

Contents of /projs/dtats/trunk/projs/20161007_dedup/qdedup.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 88 - (show annotations) (download)
Fri Nov 11 03:13:11 2016 UTC (8 years, 1 month ago) by dashley
Original Path: projs/trunk/projs/20161007_dedup/qdedup.c
File MIME type: text/plain
File size: 28465 byte(s)
Keyword expansion enabled, keyword added.
1 //----------------------------------------------------------------------------------------------------
2 //$Header$
3 //----------------------------------------------------------------------------------------------------
4 //qdedup.c
5 //----------------------------------------------------------------------------------------------------
6 //Quick and dirty program to eliminate duplicates from a file tree. A file containing the SHA512
7 //hashes of all the files to be considered must already exist, and must be regenerated each time the
8 //underlying files are deleted/added/modified, which means the file must regenerated after each run
9 //of qdedup. (WARNING: IF YOU DO NOT REGENERATE THE FILE AFTER EACH RUN OF qdedup, YOU WILL
10 //PROBABLY DESTROY DATA. THE MECHANISM WOULD BE THAT THE SHA512 MANIFEST IMPLIES THAT DUPLICATES
11 //EXIST WHEN THEY NO LONGER DO, SO qdedup WILL ERRONEOUSLY DELETE THE LAST COPIES OF FILES.) The
12 //program will eliminate duplicates within a single specified directory or outside a single specified
13 //directory.
14 //
15 //This program will compile and run only on *nix systems and under Cygwin on Windows systems.
16 //----------------------------------------------------------------------------------------------------
17 //Copyright David T. Ashley (dashley@gmail.com), 2016.
18 //----------------------------------------------------------------------------------------------------
19 //Provided under the MIT LICENSE, reproduced immediately below.
20 //----------------------------------------------------------------------------------------------------
21 //Permission is hereby granted, free of charge, to any person obtaining a copy of
22 //this software and associated documentation files (the "Software"), to deal in the
23 //Software without restriction, including without limitation the rights to use,
24 //copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
25 //Software, and to permit persons to whom the Software is furnished to do so,
26 //subject to the following conditions:
27 //
28 //The above copyright notice and this permission notice shall be included in all
29 //copies or substantial portions of the Software.
30 //
31 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 //FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 //AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 //OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37 //SOFTWARE.
38 //----------------------------------------------------------------------------------------------------
39 //All paths in the SHA512 file must be absolute or must be relative to the current working directory
40 //at the time this program is run.
41 //
42 //The recommended method to generate the SHA512 file is using the "-exec" option of the "find"
43 //command, i.e.
44 //
45 // find target_directory -type f -exec sha512sum {} \; >sha512sums.txt
46 //
47 //If any files are deleted by the program, a new SHA512 file must be generated before the program is
48 //run again to delete files. The reason for this restriction is that the program will never knowingly
49 //delete the last copy of a file. If the SHA512 file contains the digests of files that no longer
50 //exist, the program may unknowingly delete the last copies of files (because it believes based on
51 //the SHA512 file that other copies exist when in fact they do not).
52 //
53 //The SHA512 file does not need to be sorted (this program sorts it internally by hash before using it).
54 //
55 //This program is designed to compile and run under Cygwin or *nix only.
56 //
57 //Usage:
58 // qdedup
59 // Prints help information and exits.
60 // qdedup ndups <sha512file>
61 // Prints statistics about the number of duplicates in <sha512file>.
62 // qdedup filterdups <sha512file>
63 // Analyzes duplicates and prints the filenames of groups of duplicates. The output is designed
64 // for hand analysis so that insight can be gained into what duplicates exist and where they
65 // are located.
66 // qdedup dedup_preserve_inside <sha512file> <path>
67 // For each group of duplicates that exists, preserves the duplicates that exist within path
68 // and removes all others. If no copies of the duplicate exist within path, no copies of the
69 // duplicate will be removed.
70 // qdedup dryrun_preserve_inside <sha512file> <path>
71 // Exactly like "dedup_preserve_inside", except that no files will be deleted. Text will be
72 // output to explain what would be deleted by "dedup_preserve_inside".
73 // qdedup dedup_nopath <sha512file>
74 // For each group of duplicates that exists, preserves only the first (the only with lowest
75 // sort-order filename).
76 // qdedup dryrun_nopath <sha512file> <path>
77 // Exactly like "dedup_nopath", except that no files will be deleted. Text will be
78 // output to explain what would be deleted by "dedup_nopath".
79 // qdedup dedup_preserve_outside <sha512file> <path>
80 // For each group of duplicates that exists, deletes duplicates only from within the specified
81 // path. If any duplicates do not have at least one copy within <path> no instances of the
82 // duplicate are deleted.
83 // qdedup dryrun_preserve_outside <sha512file>
84 // Exactly like "dedup_preserve_outside", except that no files will be deleted. Text will be
85 // output to explain what would be deleted by "dedup_preserve_outside".
86 //----------------------------------------------------------------------------------------------------
87 #include <math.h>
88 #include <stdio.h>
89 #include <stdlib.h>
90 #include <string.h>
91 #include <time.h>
92 #include <unistd.h>
93 //----------------------------------------------------------------------------------------------------
94 #define LINELEN (78) //Number of printable characters in a line.
95 #define MAXLINELEN (2000) //The maximum number of characters that may be in a line of the
96 //SHA512 input file. This count includes the \0 terminator, so only
97 //this value minus 1 characters may be in a line.
98 #define UNLINKPAUSETIME (0.1) //Number of seconds to pause between file unlinks (deletions). This
99 //is designed to give the user time to abort the program if desired
100 //before catastrophic quantities of files are deleted.
101 //----------------------------------------------------------------------------------------------------
102 //Data structure that holds the character representation of and SHA512 hash, plus the specified
103 //filename.
104 typedef struct
105 {
106 char hash[129];
107 //512/4 = 128 characters for the hash, plus 1 character for zero terminator.
108 char *fname;
109 //Filename as specified in the file, allocated via malloc() family.
110 } tFileHashRecord;
111 //----------------------------------------------------------------------------------------------------
112 //----------------------------------------------------------------------------------------------------
113 //----- CHARACTER CLASSIFICATION FUNCTIONS ---------------------------------------------------------
114 //----------------------------------------------------------------------------------------------------
115 //----------------------------------------------------------------------------------------------------
116 //TRUE if character is part of valid hash.
117 int is_valid_hash_char(char c)
118 {
119 switch(c)
120 {
121 case '0':
122 case '1':
123 case '2':
124 case '3':
125 case '4':
126 case '5':
127 case '6':
128 case '7':
129 case '8':
130 case '9':
131 case 'a':
132 case 'b':
133 case 'c':
134 case 'd':
135 case 'e':
136 case 'f':
137 return(1);
138 break;
139 default:
140 return(0);
141 break;
142 }
143 }
144 //----------------------------------------------------------------------------------------------------
145 //TRUE if character is part of newline sequence
146 int is_newline_sequence_char(char c)
147 {
148 switch(c)
149 {
150 case 13:
151 case 10:
152 return(1);
153 break;
154 default:
155 return(0);
156 break;
157 }
158 }
159
160 //----------------------------------------------------------------------------------------------------
161 //----------------------------------------------------------------------------------------------------
162 //----- FORMATTED OUTPUT FUNCTIONS -----------------------------------------------------------------
163 //----------------------------------------------------------------------------------------------------
164 //----------------------------------------------------------------------------------------------------
165 //Repeats a character to a stream a specified number of times.
166 //
167 void stream_rep_char(FILE *s, char c, unsigned n)
168 {
169 while(n--)
170 {
171 fprintf(s, "%c", c);
172 }
173 }
174 //----------------------------------------------------------------------------------------------------
175 //Prints a horizontal line to a stream, including the newline.
176 //
177 void stream_hline(FILE *s)
178 {
179 stream_rep_char(s, '-', LINELEN);
180 fprintf(s, "\n");
181 }
182 //----------------------------------------------------------------------------------------------------
183 //Prints a horizontal line to a stdout, including the newline.
184 //
185 void stdout_hline(void)
186 {
187 stream_rep_char(stdout, '-', LINELEN);
188 fprintf(stdout, "\n");
189 }
190 //----------------------------------------------------------------------------------------------------
191 //----------------------------------------------------------------------------------------------------
192 //----- FATAL ERROR FUNCTIONS ----------------------------------------------------------------------
193 //----------------------------------------------------------------------------------------------------
194 //----------------------------------------------------------------------------------------------------
195 //Errors out fatally.
196 //
197 void fatal(const char *desc, const char *file, unsigned line)
198 {
199 stdout_hline();
200 printf("Fatal error: %s\n", desc);
201 printf("Source file: %s\n", file);
202 printf("Line : %u\n", line);
203 stdout_hline();
204 exit(1);
205 }
206 //----------------------------------------------------------------------------------------------------
207 //----------------------------------------------------------------------------------------------------
208 //----- MEMORY ALLOCATION WRAPPERS -----------------------------------------------------------------
209 //----------------------------------------------------------------------------------------------------
210 //----------------------------------------------------------------------------------------------------
211 //malloc() wrapper.
212 void *w_malloc(size_t nbytes)
213 {
214 void *rv;
215
216 if (!nbytes)
217 {
218 fatal("Memory allocation request for 0 bytes.", __FILE__, __LINE__);
219 }
220
221 rv = malloc(nbytes);
222
223 if (!rv)
224 {
225 fatal("Out of memory in malloc() request.", __FILE__, __LINE__);
226 }
227
228 //Zero out, just for consistency.
229 memset(rv, 0, nbytes);
230 }
231 //----------------------------------------------------------------------------------------------------
232 //realloc() wrapper.
233 void *w_realloc(void *p, size_t n)
234 {
235 void *rv;
236
237 if (!n)
238 {
239 fatal("Memory reallocation request for 0 bytes.", __FILE__, __LINE__);
240 }
241
242 if (!p)
243 {
244 fatal("Memory reallocation request with NULL pointer.", __FILE__, __LINE__);
245 }
246
247 rv = realloc(p, n);
248
249 if (!rv)
250 {
251 fatal("Out of memory in realloc() request.", __FILE__, __LINE__);
252 }
253 }
254 //----------------------------------------------------------------------------------------------------
255 //----------------------------------------------------------------------------------------------------
256 //----- SLEEP FUNCTIONS ----------------------------------------------------------------------------
257 //----------------------------------------------------------------------------------------------------
258 //----------------------------------------------------------------------------------------------------
259 //Sleep for a time, in seconds.
260 void w_sleep(double seconds)
261 {
262 struct timespec t;
263
264 if (seconds < 0)
265 {
266 fatal("Sleep for negative time request.", __FILE__, __LINE__);
267 }
268 else if (seconds > 3600)
269 {
270 fatal("Sleep for too long request.", __FILE__, __LINE__);
271 }
272
273 t.tv_sec = floor(seconds);
274 t.tv_nsec = (seconds - floor(seconds)) * 1E9;
275
276 nanosleep(&t, NULL);
277 }
278 //----------------------------------------------------------------------------------------------------
279 //----------------------------------------------------------------------------------------------------
280 //----- SHA512 FIELD READ FUNCTIONS ----------------------------------------------------------------
281 //----------------------------------------------------------------------------------------------------
282 //----------------------------------------------------------------------------------------------------
283 //These functions read in an individual field of a standard SHA512 file generated using application
284 //of the standard sha512sum program.
285 //
286 //*rcode = 1, success.
287 // 0, legal end of file, record assigned.
288 void get_sha512file_line(FILE *s, int *rcode, tFileHashRecord *hash_rec)
289 {
290 unsigned bidx;
291 unsigned nchars;
292 int ic;
293 int exitflag;
294 int eoffound;
295 int eolfound;
296 char c;
297 char buf[MAXLINELEN];
298
299 //Zero out the buffer. This handles string termination automatically.
300 memset(buf, 0, sizeof(buf));
301
302 //Read characters into the buffer until either hit EOF, newline, or can't
303 //fill the buffer any longer.
304 eoffound = 0;
305 eolfound = 0;
306 exitflag = 0;
307 bidx = 0;
308 do
309 {
310 ic = fgetc(s);
311 c = ic;
312
313 if (ic == EOF)
314 {
315 eoffound = 1;
316 eolfound = 0;
317 nchars = bidx;
318 exitflag = 1;
319 }
320 else if (is_newline_sequence_char(c))
321 {
322 eoffound = 0;
323 eolfound = 1;
324 nchars = bidx;
325 exitflag = 1;
326 }
327 else if (bidx >= (MAXLINELEN - 1))
328 {
329 fatal("SHA512 hash file line too long to parse.", __FILE__, __LINE__);
330 }
331 else
332 {
333 buf[bidx] = c;
334 bidx++;
335 exitflag = 0;
336 }
337 } while(! exitflag);
338
339 //If we encountered a newline, inch past it. We may encounter an EOF.
340 if (eolfound)
341 {
342 exitflag = 0;
343 do
344 {
345 ic = fgetc(s);
346 c = ic;
347
348 if (ic == EOF)
349 {
350 eoffound = 1;
351 eolfound = 0;
352 exitflag = 1;
353 }
354 else if (is_newline_sequence_char(c))
355 {
356 exitflag = 0;
357 }
358 else
359 {
360 //We hit the next line. Put the character back.
361 eoffound = 0;
362 eolfound = 1;
363 ungetc(ic, s);
364 exitflag = 1;
365 }
366 } while(! exitflag);
367 }
368
369 //For better or worse, we have a \0-terminated line in the buffer.
370 //
371 //Zero the caller's area. This takes care of the hash terminator as well.
372 memset(hash_rec, 0, sizeof(*hash_rec));
373
374 //Ensure that we have at least 128 characters, and they are all hex characters.
375 //Otherwise, we can't proceed.
376 if (nchars < 128)
377 {
378 fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
379 }
380 else
381 {
382 for (bidx = 0; bidx < 128; bidx++)
383 {
384 if (! is_valid_hash_char(buf[bidx]))
385 {
386 fatal("Character in SHA512 hash portion of line inconsistent with hash.", __FILE__, __LINE__);
387 }
388 }
389 }
390
391 //The 129th and 130'th character must be present and must be a space and asterisk, respectively.
392 if (nchars < 130)
393 {
394 fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
395 }
396 else if (buf[128] != ' ')
397 {
398 fatal("129th hash line character must be \" \".", __FILE__, __LINE__);
399 }
400 // else if (buf[129] != '*')
401 // {
402 // fatal("130th hash line character must be \"*\".", __FILE__, __LINE__);
403 // }
404 else if (buf[129] != ' ')
405 {
406 //130th character is ' '. Need to figure out why sometimes space and sometimes '*'.
407 fatal("130th hash line character must be \" \".", __FILE__, __LINE__);
408 }
409
410 //There must be a 131'st character. Beyond that, we can't qualify, because filenames may
411 //have odd characters and may be of any length.
412 if (nchars < 131)
413 {
414 fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
415 }
416
417 //Copy the hash to the caller's area. The terminator has already been inserted.
418 memcpy(&(hash_rec->hash[0]), buf, 128);
419
420 //Allocate space for the filename.
421 hash_rec->fname = w_malloc(strlen(buf+130) + 1);
422
423 //Make the copy.
424 strcpy(hash_rec->fname, buf+130);
425
426 if (eoffound)
427 *rcode = 0;
428 else
429 *rcode = 1;
430 }
431 //----------------------------------------------------------------------------------------------------
432 void parseinputfile(tFileHashRecord **parsed_recs, unsigned *count, char *fname)
433 {
434 FILE *s;
435 int rcode;
436
437 //Try to open the file for reading. Inability is a failure.
438 s = fopen(fname, "r");
439 if (!s)
440 {
441 fatal("Hash file open failure.", __FILE__, __LINE__);
442 }
443
444 //Start off with a count of 0 and a NULL pointer.
445 *count = 0;
446 *parsed_recs = NULL;
447
448 do
449 {
450 //For the first time, allocate space for one record. Beyond that,
451 //expand it.
452 if (! *parsed_recs)
453 {
454 *parsed_recs = w_malloc(sizeof(tFileHashRecord));
455 }
456 else
457 {
458 *parsed_recs = w_realloc(*parsed_recs, (size_t)((*count + 1)) * sizeof(tFileHashRecord));
459 }
460
461 //Parse and fill in the space.
462 get_sha512file_line(s, &rcode, (*parsed_recs) + (*count));
463
464 //We now have one more.
465 (*count)++;
466 } while(rcode == 1);
467
468 //Try to close the file. Inability is a failure.
469 if (fclose(s))
470 {
471 fatal("Hash file close failure.", __FILE__, __LINE__);
472 }
473 }
474 //----------------------------------------------------------------------------------------------------
475 int sortcmpascendinghash(const void *p0_in, const void *p1_in)
476 {
477 const tFileHashRecord *p0, *p1;
478
479 p0 = p0_in;
480 p1 = p1_in;
481
482 return(strcmp(p0->hash, p1->hash));
483 }
484
485 //----------------------------------------------------------------------------------------------------
486 void sortinternaldsbyhash(tFileHashRecord *parsed_recs, unsigned count)
487 {
488 qsort(parsed_recs, count, sizeof(tFileHashRecord), sortcmpascendinghash);
489 }
490 //----------------------------------------------------------------------------------------------------
491 int sortcmpascendingfname(const void *p0_in, const void *p1_in)
492 {
493 const tFileHashRecord *p0, *p1;
494
495 p0 = p0_in;
496 p1 = p1_in;
497
498 return(strcmp(p0->fname, p1->fname));
499 }
500 //----------------------------------------------------------------------------------------------------
501 //This sort has to be run after the hash sort. Within groups of identical hashes, it sorts by
502 //ascending filename.
503 void sortinternalgroupfname(tFileHashRecord *parsed_recs, unsigned count)
504 {
505 unsigned ui;
506 unsigned i_group_min, i_group_max;
507
508 if (! count)
509 return;
510
511 i_group_min = 0;
512 i_group_max = 0;
513
514 do
515 {
516 //Advance i_group_max to the end of the group of duplicates.
517 while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
518 {
519 i_group_max++;
520 }
521
522 if (i_group_min != i_group_max)
523 {
524 //Sort the internal group.
525 qsort(parsed_recs + i_group_min,
526 i_group_max - i_group_min + 1,
527 sizeof(tFileHashRecord),
528 sortcmpascendingfname);
529 }
530
531 //On to the next group.
532 i_group_max++;
533 i_group_min = i_group_max;
534
535 } while (i_group_max < (count - 1));
536 }
537 //----------------------------------------------------------------------------------------------------
538 void printsinglerecord(tFileHashRecord *rec, unsigned elno)
539 {
540 printf("[%9u]\n", elno);
541 printf("Hash : %s\n", rec->hash);
542 printf("Filename : %s\n", rec->fname);
543 stdout_hline();
544 }
545 //----------------------------------------------------------------------------------------------------
546 void printinternalds(tFileHashRecord *parsed_recs, unsigned count)
547 {
548 unsigned i;
549
550 for (i=0; i<count; i++)
551 {
552 printsinglerecord(parsed_recs + i, i);
553 }
554 }
555 //----------------------------------------------------------------------------------------------------
556 void gather_dup_stats(tFileHashRecord *parsed_recs, unsigned count, unsigned *out_num_dups, unsigned *out_cumulative_dups)
557 {
558 unsigned i_group_min, i_group_max;
559
560 *out_num_dups = 0;
561 *out_cumulative_dups = 0;
562
563 if (! count)
564 return;
565
566 i_group_min = 0;
567 i_group_max = 0;
568
569 do
570 {
571 //Advance i_group_max to the end of the group of duplicates.
572 while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
573 {
574 i_group_max++;
575 }
576
577 //Log the findings.
578 if (i_group_min != i_group_max)
579 {
580 (*out_num_dups)++;
581 (*out_cumulative_dups) += (i_group_max - i_group_min + 1);
582 }
583
584 //On to the next group.
585 i_group_max++;
586 i_group_min = i_group_max;
587
588 } while (i_group_max < (count - 1));
589 }
590 //----------------------------------------------------------------------------------------------------
591 void option_dups(char *fname)
592 {
593 tFileHashRecord *parsed_recs;
594 unsigned count, num_dups, cumulative_dups;
595
596 parseinputfile(&parsed_recs, &count, fname);
597 //printf("%u records parsed.\n", count);
598 sortinternaldsbyhash(parsed_recs, count);
599 sortinternalgroupfname(parsed_recs, count);
600 printinternalds(parsed_recs, count);
601 stdout_hline();
602 gather_dup_stats(parsed_recs, count, &num_dups, &cumulative_dups);
603 printf("Number of duplicated files : %u\n", num_dups);
604 if (num_dups)
605 {
606 printf("Average number of duplicates: %.2f\n", (double)cumulative_dups/(double)num_dups);
607 }
608 }
609 //----------------------------------------------------------------------------------------------------
610 void option_filterdups(char *fname)
611 {
612 tFileHashRecord *parsed_recs;
613 unsigned dupgroup;
614 unsigned count;
615 unsigned ui;
616 unsigned i_group_min, i_group_max;
617
618 parseinputfile(&parsed_recs, &count, fname);
619 //printf("%u records parsed.\n", count);
620 sortinternaldsbyhash(parsed_recs, count);
621 sortinternalgroupfname(parsed_recs, count);
622
623 if (! count)
624 return;
625
626 dupgroup = 0;
627 i_group_min = 0;
628 i_group_max = 0;
629
630 do
631 {
632 //Advance i_group_max to the end of the group of duplicates.
633 while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
634 {
635 i_group_max++;
636 }
637
638 //Print the findings.
639 if (i_group_min != i_group_max)
640 {
641 printf("Duplicate group %u:\n", dupgroup);
642 for (ui = i_group_min; ui <= i_group_max; ui++)
643 {
644 printf("%s\n", parsed_recs[ui].fname);
645 }
646
647 dupgroup++;
648
649 stdout_hline();
650 }
651
652 //On to the next group.
653 i_group_max++;
654 i_group_min = i_group_max;
655
656 } while (i_group_max < (count - 1));
657 }
658 //----------------------------------------------------------------------------------------------------
659 //Returns true if the filename is within the specified path, or false otherwise.
660 int is_path_member(const char *fname, const char *path)
661 {
662 if (strlen(fname) == 0)
663 {
664 fatal("Zero-length filename.", __FILE__, __LINE__);
665 }
666 else if (strlen(path) == 0)
667 {
668 fatal("Zero-length path.", __FILE__, __LINE__);
669 }
670 else if (path[strlen(path) - 1] != '/')
671 {
672 fatal("Paths must canonically end with forward slash character.", __FILE__, __LINE__);
673 }
674 else if (strlen(fname) <= strlen(path))
675 {
676 //Can't be in the path because filename is not longer than path name.
677 return 0;
678 }
679 else if (memcmp(fname, path, strlen(path)) == 0)
680 {
681 return 1;
682 }
683 else
684 {
685 return 0;
686 }
687 }
688 //----------------------------------------------------------------------------------------------------
689 void option_dedup(char *fname, char *path, int may_delete, double pause_time)
690 {
691 tFileHashRecord *parsed_recs;
692 unsigned dupgroup;
693 unsigned count;
694 unsigned ui;
695 unsigned within_path;
696 unsigned i_group_min, i_group_max;
697
698 parseinputfile(&parsed_recs, &count, fname);
699 //printf("%u records parsed.\n", count);
700 sortinternaldsbyhash(parsed_recs, count);
701 sortinternalgroupfname(parsed_recs, count);
702
703 if (! count)
704 return;
705
706 dupgroup = 0;
707 i_group_min = 0;
708 i_group_max = 0;
709
710 do
711 {
712 //Advance i_group_max to the end of the group of duplicates.
713 while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
714 {
715 i_group_max++;
716 }
717
718 //If this is a group of duplicates.
719 if (i_group_min != i_group_max)
720 {
721 //Print the findings.
722 printf("Duplicate group %u:\n", dupgroup);
723 for (ui = i_group_min; ui <= i_group_max; ui++)
724 {
725 printf("%s\n", parsed_recs[ui].fname);
726 }
727
728 dupgroup++;
729
730 stdout_hline();
731
732 //Count how many of the group of duplicates are within the supplied path.
733 within_path = 0;
734 for (ui = i_group_min; ui <= i_group_max; ui++)
735 {
736 if (is_path_member(parsed_recs[ui].fname, path))
737 {
738 within_path++;
739 }
740 }
741
742 //We have to take different actions based on whether we do or don't have any within path.
743 //If we don't have any, we may delete nothing.
744 if (! within_path)
745 {
746 printf("None of these duplicates in path--taking no action.\n");
747 //stdout_hline();
748 }
749 else
750 {
751 for (ui = i_group_min; ui <= i_group_max; ui++)
752 {
753 if (is_path_member(parsed_recs[ui].fname, path))
754 {
755 printf("Not deleting: %s\n", parsed_recs[ui].fname);
756 }
757 else
758 {
759 printf("Deleting : %s\n", parsed_recs[ui].fname);
760 if (may_delete)
761 {
762 if (! unlink(parsed_recs[ui].fname))
763 {
764 printf(" File deleted (unlinked) successfully.\n");
765 }
766 else
767 {
768 printf(" Failure attempting to delete (unlink) file.\n");
769 }
770 }
771 else
772 {
773 printf(" Dry run only.\n");
774 }
775 }
776
777 //w_sleep(pause_time);
778 }
779 }
780
781 stdout_hline();
782 }
783
784 //On to the next group.
785 i_group_max++;
786 i_group_min = i_group_max;
787
788 } while (i_group_max < (count - 1));
789 }
790 //----------------------------------------------------------------------------------------------------
791 int main(int argc, char* argv[])
792 {
793 stdout_hline();
794 printf("Execution begins.\n");
795 stdout_hline();
796
797 if (argc == 1)
798 {
799 }
800 else if ((argc == 3) && (strcmp(argv[1], "ndups") == 0))
801 {
802 option_dups(argv[2]);
803 }
804 else if ((argc == 3) && (strcmp(argv[1], "filterdups") == 0))
805 {
806 option_filterdups(argv[2]);
807 }
808 else if ((argc == 3) && (strcmp(argv[1], "dedup_nopath") == 0))
809 {
810 //option_filterdups(argv[2]);
811 }
812 else if ((argc == 3) && (strcmp(argv[1], "dryrun_nopath") == 0))
813 {
814 //option_filterdups(argv[2]);
815 }
816 else if ((argc == 4) && (strcmp(argv[1], "dedup_preserve_inside") == 0))
817 {
818 option_dedup(argv[2], argv[3], 1, UNLINKPAUSETIME);
819 }
820 else if ((argc == 4) && (strcmp(argv[1], "dryrun_preserve_inside") == 0))
821 {
822 option_dedup(argv[2], argv[3], 0, UNLINKPAUSETIME/10.0);
823 }
824 else
825 {
826 printf("Unrecognized parameter form. Try \"dedup\".\n");
827 }
828
829 //w_sleep(-3 /* UNLINKPAUSETIME*/ );
830
831 //stdout_hline();
832 printf("Execution ends.\n");
833 stdout_hline();
834
835 return 0;
836 }
837 //----------------------------------------------------------------------------------------------------
838

Properties

Name Value
svn:eol-style native
svn:keywords Header

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25