/[dtapublic]/projs/trunk/projs/20161007_dedup/qdedup.c
ViewVC logotype

Contents of /projs/trunk/projs/20161007_dedup/qdedup.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 74 - (show annotations) (download)
Sat Nov 5 16:51:05 2016 UTC (7 years, 11 months ago) by dashley
File MIME type: text/plain
File size: 28099 byte(s)
Documentation cleanup.
1 //----------------------------------------------------------------------------------------------------
2 //qdedup.c
3 //----------------------------------------------------------------------------------------------------
4 //Quick and dirty program to eliminate duplicates from a file tree. A file containing the SHA512
5 //hashes of all the files to be considered must already exist, and must be regenerated each time the
6 //underlying files are deleted/added/modified, which means the file must regenerated after each run
7 //of qdedup. (WARNING: IF YOU DO NOT REGENERATE THE FILE AFTER EACH RUN OF qdedup, YOU WILL
8 //PROBABLY DESTROY DATA. THE MECHANISM WOULD BE THAT THE SHA512 MANIFEST IMPLIES THAT DUPLICATES
9 //EXIST WHEN THEY NO LONGER DO, SO qdedup WILL ERRONEOUSLY DELETE THE LAST COPIES OF FILES.) The
10 //program will eliminate duplicates within a single specified directory or outside a single specified
11 //directory.
12 //
13 //This program will compile and run only on *nix systems and under Cygwin on Windows systems.
14 //----------------------------------------------------------------------------------------------------
15 //Copyright David T. Ashley (dashley@gmail.com), 2016.
16 //----------------------------------------------------------------------------------------------------
17 //Provided under the MIT LICENSE, reproduced immediately below.
18 //----------------------------------------------------------------------------------------------------
19 //Permission is hereby granted, free of charge, to any person obtaining a copy of
20 //this software and associated documentation files (the "Software"), to deal in the
21 //Software without restriction, including without limitation the rights to use,
22 //copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
23 //Software, and to permit persons to whom the Software is furnished to do so,
24 //subject to the following conditions:
25 //
26 //The above copyright notice and this permission notice shall be included in all
27 //copies or substantial portions of the Software.
28 //
29 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 //FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 //AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34 //OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 //SOFTWARE.
36 //----------------------------------------------------------------------------------------------------
37 //All paths in the SHA512 file must be absolute or must be relative to the current working directory
38 //at the time this program is run.
39 //
40 //The recommended method to generate the SHA512 file is using the "-exec" option of the "find"
41 //command, i.e.
42 //
43 // find target_directory -type f -exec sha512sum {} \; >sha512sums.txt
44 //
45 //If any files are deleted by the program, a new SHA512 file must be generated before the program is
46 //run again to delete files. The reason for this restriction is that the program will never knowingly
47 //delete the last copy of a file. If the SHA512 file contains the digests of files that no longer
48 //exist, the program may unknowingly delete the last copies of files (because it believes based on
49 //the SHA512 file that other copies exist when in fact they do not).
50 //
51 //The SHA512 file does not need to be sorted (this program sorts it internally by hash before using it).
52 //
53 //This program is designed to compile and run under Cygwin or *nix only.
54 //
55 //Usage:
56 // qdedup
57 // Prints help information and exits.
58 // qdedup ndups <sha512file>
59 // Prints statistics about the number of duplicates in <sha512file>.
60 // qdedup filterdups <sha512file>
61 // Analyzes duplicates and prints the filenames of groups of duplicates. The output is designed
62 // for hand analysis so that insight can be gained into what duplicates exist and where they
63 // are located.
64 // qdedup dedup_preserve_inside <sha512file> <path>
65 // For each group of duplicates that exists, preserves the duplicates that exist within path
66 // and removes all others. If no copies of the duplicate exist within path, no copies of the
67 // duplicate will be removed.
68 // qdedup dryrun_preserve_inside <sha512file> <path>
69 // Exactly like "dedup_preserve_inside", except that no files will be deleted. Text will be
70 // output to explain what would be deleted by "dedup_preserve_inside".
71 // qdedup dedup_nopath <sha512file>
72 // For each group of duplicates that exists, preserves only the first (the only with lowest
73 // sort-order filename).
74 // qdedup dryrun_nopath <sha512file> <path>
75 // Exactly like "dedup_nopath", except that no files will be deleted. Text will be
76 // output to explain what would be deleted by "dedup_nopath".
77 // qdedup dedup_preserve_outside <sha512file> <path>
78 // For each group of duplicates that exists, deletes duplicates only from within the specified
79 // path. If any duplicates do not have at least one copy within <path> no instances of the
80 // duplicate are deleted.
81 // qdedup dryrun_preserve_outside <sha512file>
82 // Exactly like "dedup_preserve_outside", except that no files will be deleted. Text will be
83 // output to explain what would be deleted by "dedup_preserve_outside".
84 //----------------------------------------------------------------------------------------------------
85 #include <math.h>
86 #include <stdio.h>
87 #include <stdlib.h>
88 #include <string.h>
89 #include <time.h>
90 #include <unistd.h>
91 //----------------------------------------------------------------------------------------------------
92 #define LINELEN (78) //Number of printable characters in a line.
93 #define MAXLINELEN (2000) //The maximum number of characters that may be in a line of the
94 //SHA512 input file. This count includes the \0 terminator, so only
95 //this value minus 1 characters may be in a line.
96 #define UNLINKPAUSETIME (0.1) //Number of seconds to pause between file unlinks (deletions). This
97 //is designed to give the user time to abort the program if desired
98 //before catastrophic quantities of files are deleted.
99 //----------------------------------------------------------------------------------------------------
100 //Data structure that holds the character representation of and SHA512 hash, plus the specified
101 //filename.
102 typedef struct
103 {
104 char hash[129];
105 //512/4 = 128 characters for the hash, plus 1 character for zero terminator.
106 char *fname;
107 //Filename as specified in the file, allocated via malloc() family.
108 } tFileHashRecord;
109 //----------------------------------------------------------------------------------------------------
110 //----------------------------------------------------------------------------------------------------
111 //----- CHARACTER CLASSIFICATION FUNCTIONS ---------------------------------------------------------
112 //----------------------------------------------------------------------------------------------------
113 //----------------------------------------------------------------------------------------------------
114 //TRUE if character is part of valid hash.
115 int is_valid_hash_char(char c)
116 {
117 switch(c)
118 {
119 case '0':
120 case '1':
121 case '2':
122 case '3':
123 case '4':
124 case '5':
125 case '6':
126 case '7':
127 case '8':
128 case '9':
129 case 'a':
130 case 'b':
131 case 'c':
132 case 'd':
133 case 'e':
134 case 'f':
135 return(1);
136 break;
137 default:
138 return(0);
139 break;
140 }
141 }
142 //----------------------------------------------------------------------------------------------------
143 //TRUE if character is part of newline sequence
144 int is_newline_sequence_char(char c)
145 {
146 switch(c)
147 {
148 case 13:
149 case 10:
150 return(1);
151 break;
152 default:
153 return(0);
154 break;
155 }
156 }
157
158 //----------------------------------------------------------------------------------------------------
159 //----------------------------------------------------------------------------------------------------
160 //----- FORMATTED OUTPUT FUNCTIONS -----------------------------------------------------------------
161 //----------------------------------------------------------------------------------------------------
162 //----------------------------------------------------------------------------------------------------
163 //Repeats a character to a stream a specified number of times.
164 //
165 void stream_rep_char(FILE *s, char c, unsigned n)
166 {
167 while(n--)
168 {
169 fprintf(s, "%c", c);
170 }
171 }
172 //----------------------------------------------------------------------------------------------------
173 //Prints a horizontal line to a stream, including the newline.
174 //
175 void stream_hline(FILE *s)
176 {
177 stream_rep_char(s, '-', LINELEN);
178 fprintf(s, "\n");
179 }
180 //----------------------------------------------------------------------------------------------------
181 //Prints a horizontal line to a stdout, including the newline.
182 //
183 void stdout_hline(void)
184 {
185 stream_rep_char(stdout, '-', LINELEN);
186 fprintf(stdout, "\n");
187 }
188 //----------------------------------------------------------------------------------------------------
189 //----------------------------------------------------------------------------------------------------
190 //----- FATAL ERROR FUNCTIONS ----------------------------------------------------------------------
191 //----------------------------------------------------------------------------------------------------
192 //----------------------------------------------------------------------------------------------------
193 //Errors out fatally.
194 //
195 void fatal(const char *desc, const char *file, unsigned line)
196 {
197 stdout_hline();
198 printf("Fatal error: %s\n", desc);
199 printf("Source file: %s\n", file);
200 printf("Line : %u\n", line);
201 stdout_hline();
202 exit(1);
203 }
204 //----------------------------------------------------------------------------------------------------
205 //----------------------------------------------------------------------------------------------------
206 //----- MEMORY ALLOCATION WRAPPERS -----------------------------------------------------------------
207 //----------------------------------------------------------------------------------------------------
208 //----------------------------------------------------------------------------------------------------
209 //malloc() wrapper.
210 void *w_malloc(size_t nbytes)
211 {
212 void *rv;
213
214 if (!nbytes)
215 {
216 fatal("Memory allocation request for 0 bytes.", __FILE__, __LINE__);
217 }
218
219 rv = malloc(nbytes);
220
221 if (!rv)
222 {
223 fatal("Out of memory in malloc() request.", __FILE__, __LINE__);
224 }
225
226 //Zero out, just for consistency.
227 memset(rv, 0, nbytes);
228 }
229 //----------------------------------------------------------------------------------------------------
230 //realloc() wrapper.
231 void *w_realloc(void *p, size_t n)
232 {
233 void *rv;
234
235 if (!n)
236 {
237 fatal("Memory reallocation request for 0 bytes.", __FILE__, __LINE__);
238 }
239
240 if (!p)
241 {
242 fatal("Memory reallocation request with NULL pointer.", __FILE__, __LINE__);
243 }
244
245 rv = realloc(p, n);
246
247 if (!rv)
248 {
249 fatal("Out of memory in realloc() request.", __FILE__, __LINE__);
250 }
251 }
252 //----------------------------------------------------------------------------------------------------
253 //----------------------------------------------------------------------------------------------------
254 //----- SLEEP FUNCTIONS ----------------------------------------------------------------------------
255 //----------------------------------------------------------------------------------------------------
256 //----------------------------------------------------------------------------------------------------
257 //Sleep for a time, in seconds.
258 void w_sleep(double seconds)
259 {
260 struct timespec t;
261
262 if (seconds < 0)
263 {
264 fatal("Sleep for negative time request.", __FILE__, __LINE__);
265 }
266 else if (seconds > 3600)
267 {
268 fatal("Sleep for too long request.", __FILE__, __LINE__);
269 }
270
271 t.tv_sec = floor(seconds);
272 t.tv_nsec = (seconds - floor(seconds)) * 1E9;
273
274 nanosleep(&t, NULL);
275 }
276 //----------------------------------------------------------------------------------------------------
277 //----------------------------------------------------------------------------------------------------
278 //----- SHA512 FIELD READ FUNCTIONS ----------------------------------------------------------------
279 //----------------------------------------------------------------------------------------------------
280 //----------------------------------------------------------------------------------------------------
281 //These functions read in an individual field of a standard SHA512 file generated using application
282 //of the standard sha512sum program.
283 //
284 //*rcode = 1, success.
285 // 0, legal end of file, record assigned.
286 void get_sha512file_line(FILE *s, int *rcode, tFileHashRecord *hash_rec)
287 {
288 unsigned bidx;
289 unsigned nchars;
290 int ic;
291 int exitflag;
292 int eoffound;
293 int eolfound;
294 char c;
295 char buf[MAXLINELEN];
296
297 //Zero out the buffer. This handles string termination automatically.
298 memset(buf, 0, sizeof(buf));
299
300 //Read characters into the buffer until either hit EOF, newline, or can't
301 //fill the buffer any longer.
302 eoffound = 0;
303 eolfound = 0;
304 exitflag = 0;
305 bidx = 0;
306 do
307 {
308 ic = fgetc(s);
309 c = ic;
310
311 if (ic == EOF)
312 {
313 eoffound = 1;
314 eolfound = 0;
315 nchars = bidx;
316 exitflag = 1;
317 }
318 else if (is_newline_sequence_char(c))
319 {
320 eoffound = 0;
321 eolfound = 1;
322 nchars = bidx;
323 exitflag = 1;
324 }
325 else if (bidx >= (MAXLINELEN - 1))
326 {
327 fatal("SHA512 hash file line too long to parse.", __FILE__, __LINE__);
328 }
329 else
330 {
331 buf[bidx] = c;
332 bidx++;
333 exitflag = 0;
334 }
335 } while(! exitflag);
336
337 //If we encountered a newline, inch past it. We may encounter an EOF.
338 if (eolfound)
339 {
340 exitflag = 0;
341 do
342 {
343 ic = fgetc(s);
344 c = ic;
345
346 if (ic == EOF)
347 {
348 eoffound = 1;
349 eolfound = 0;
350 exitflag = 1;
351 }
352 else if (is_newline_sequence_char(c))
353 {
354 exitflag = 0;
355 }
356 else
357 {
358 //We hit the next line. Put the character back.
359 eoffound = 0;
360 eolfound = 1;
361 ungetc(ic, s);
362 exitflag = 1;
363 }
364 } while(! exitflag);
365 }
366
367 //For better or worse, we have a \0-terminated line in the buffer.
368 //
369 //Zero the caller's area. This takes care of the hash terminator as well.
370 memset(hash_rec, 0, sizeof(*hash_rec));
371
372 //Ensure that we have at least 128 characters, and they are all hex characters.
373 //Otherwise, we can't proceed.
374 if (nchars < 128)
375 {
376 fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
377 }
378 else
379 {
380 for (bidx = 0; bidx < 128; bidx++)
381 {
382 if (! is_valid_hash_char(buf[bidx]))
383 {
384 fatal("Character in SHA512 hash portion of line inconsistent with hash.", __FILE__, __LINE__);
385 }
386 }
387 }
388
389 //The 129th and 130'th character must be present and must be a space and asterisk, respectively.
390 if (nchars < 130)
391 {
392 fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
393 }
394 else if (buf[128] != ' ')
395 {
396 fatal("129th hash line character must be \" \".", __FILE__, __LINE__);
397 }
398 else if (buf[129] != '*')
399 {
400 fatal("130th hash line character must be \"*\".", __FILE__, __LINE__);
401 }
402
403 //There must be a 131'st character. Beyond that, we can't qualify, because filenames may
404 //have odd characters and may be of any length.
405 if (nchars < 131)
406 {
407 fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
408 }
409
410 //Copy the hash to the caller's area. The terminator has already been inserted.
411 memcpy(&(hash_rec->hash[0]), buf, 128);
412
413 //Allocate space for the filename.
414 hash_rec->fname = w_malloc(strlen(buf+130) + 1);
415
416 //Make the copy.
417 strcpy(hash_rec->fname, buf+130);
418
419 if (eoffound)
420 *rcode = 0;
421 else
422 *rcode = 1;
423 }
424 //----------------------------------------------------------------------------------------------------
425 void parseinputfile(tFileHashRecord **parsed_recs, unsigned *count, char *fname)
426 {
427 FILE *s;
428 int rcode;
429
430 //Try to open the file for reading. Inability is a failure.
431 s = fopen(fname, "r");
432 if (!s)
433 {
434 fatal("Hash file open failure.", __FILE__, __LINE__);
435 }
436
437 //Start off with a count of 0 and a NULL pointer.
438 *count = 0;
439 *parsed_recs = NULL;
440
441 do
442 {
443 //For the first time, allocate space for one record. Beyond that,
444 //expand it.
445 if (! *parsed_recs)
446 {
447 *parsed_recs = w_malloc(sizeof(tFileHashRecord));
448 }
449 else
450 {
451 *parsed_recs = w_realloc(*parsed_recs, (size_t)((*count + 1)) * sizeof(tFileHashRecord));
452 }
453
454 //Parse and fill in the space.
455 get_sha512file_line(s, &rcode, (*parsed_recs) + (*count));
456
457 //We now have one more.
458 (*count)++;
459 } while(rcode == 1);
460
461 //Try to close the file. Inability is a failure.
462 if (fclose(s))
463 {
464 fatal("Hash file close failure.", __FILE__, __LINE__);
465 }
466 }
467 //----------------------------------------------------------------------------------------------------
468 int sortcmpascendinghash(const void *p0_in, const void *p1_in)
469 {
470 const tFileHashRecord *p0, *p1;
471
472 p0 = p0_in;
473 p1 = p1_in;
474
475 return(strcmp(p0->hash, p1->hash));
476 }
477
478 //----------------------------------------------------------------------------------------------------
479 void sortinternaldsbyhash(tFileHashRecord *parsed_recs, unsigned count)
480 {
481 qsort(parsed_recs, count, sizeof(tFileHashRecord), sortcmpascendinghash);
482 }
483 //----------------------------------------------------------------------------------------------------
484 int sortcmpascendingfname(const void *p0_in, const void *p1_in)
485 {
486 const tFileHashRecord *p0, *p1;
487
488 p0 = p0_in;
489 p1 = p1_in;
490
491 return(strcmp(p0->fname, p1->fname));
492 }
493 //----------------------------------------------------------------------------------------------------
494 //This sort has to be run after the hash sort. Within groups of identical hashes, it sorts by
495 //ascending filename.
496 void sortinternalgroupfname(tFileHashRecord *parsed_recs, unsigned count)
497 {
498 unsigned ui;
499 unsigned i_group_min, i_group_max;
500
501 if (! count)
502 return;
503
504 i_group_min = 0;
505 i_group_max = 0;
506
507 do
508 {
509 //Advance i_group_max to the end of the group of duplicates.
510 while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
511 {
512 i_group_max++;
513 }
514
515 if (i_group_min != i_group_max)
516 {
517 //Sort the internal group.
518 qsort(parsed_recs + i_group_min,
519 i_group_max - i_group_min + 1,
520 sizeof(tFileHashRecord),
521 sortcmpascendingfname);
522 }
523
524 //On to the next group.
525 i_group_max++;
526 i_group_min = i_group_max;
527
528 } while (i_group_max < (count - 1));
529 }
530 //----------------------------------------------------------------------------------------------------
531 void printsinglerecord(tFileHashRecord *rec, unsigned elno)
532 {
533 printf("[%9u]\n", elno);
534 printf("Hash : %s\n", rec->hash);
535 printf("Filename : %s\n", rec->fname);
536 stdout_hline();
537 }
538 //----------------------------------------------------------------------------------------------------
539 void printinternalds(tFileHashRecord *parsed_recs, unsigned count)
540 {
541 unsigned i;
542
543 for (i=0; i<count; i++)
544 {
545 printsinglerecord(parsed_recs + i, i);
546 }
547 }
548 //----------------------------------------------------------------------------------------------------
549 void gather_dup_stats(tFileHashRecord *parsed_recs, unsigned count, unsigned *out_num_dups, unsigned *out_cumulative_dups)
550 {
551 unsigned i_group_min, i_group_max;
552
553 *out_num_dups = 0;
554 *out_cumulative_dups = 0;
555
556 if (! count)
557 return;
558
559 i_group_min = 0;
560 i_group_max = 0;
561
562 do
563 {
564 //Advance i_group_max to the end of the group of duplicates.
565 while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
566 {
567 i_group_max++;
568 }
569
570 //Log the findings.
571 if (i_group_min != i_group_max)
572 {
573 (*out_num_dups)++;
574 (*out_cumulative_dups) += (i_group_max - i_group_min + 1);
575 }
576
577 //On to the next group.
578 i_group_max++;
579 i_group_min = i_group_max;
580
581 } while (i_group_max < (count - 1));
582 }
583 //----------------------------------------------------------------------------------------------------
584 void option_dups(char *fname)
585 {
586 tFileHashRecord *parsed_recs;
587 unsigned count, num_dups, cumulative_dups;
588
589 parseinputfile(&parsed_recs, &count, fname);
590 //printf("%u records parsed.\n", count);
591 sortinternaldsbyhash(parsed_recs, count);
592 sortinternalgroupfname(parsed_recs, count);
593 printinternalds(parsed_recs, count);
594 stdout_hline();
595 gather_dup_stats(parsed_recs, count, &num_dups, &cumulative_dups);
596 printf("Number of duplicated files : %u\n", num_dups);
597 if (num_dups)
598 {
599 printf("Average number of duplicates: %.2f\n", (double)cumulative_dups/(double)num_dups);
600 }
601 }
602 //----------------------------------------------------------------------------------------------------
603 void option_filterdups(char *fname)
604 {
605 tFileHashRecord *parsed_recs;
606 unsigned dupgroup;
607 unsigned count;
608 unsigned ui;
609 unsigned i_group_min, i_group_max;
610
611 parseinputfile(&parsed_recs, &count, fname);
612 //printf("%u records parsed.\n", count);
613 sortinternaldsbyhash(parsed_recs, count);
614 sortinternalgroupfname(parsed_recs, count);
615
616 if (! count)
617 return;
618
619 dupgroup = 0;
620 i_group_min = 0;
621 i_group_max = 0;
622
623 do
624 {
625 //Advance i_group_max to the end of the group of duplicates.
626 while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
627 {
628 i_group_max++;
629 }
630
631 //Print the findings.
632 if (i_group_min != i_group_max)
633 {
634 printf("Duplicate group %u:\n", dupgroup);
635 for (ui = i_group_min; ui <= i_group_max; ui++)
636 {
637 printf("%s\n", parsed_recs[ui].fname);
638 }
639
640 dupgroup++;
641
642 stdout_hline();
643 }
644
645 //On to the next group.
646 i_group_max++;
647 i_group_min = i_group_max;
648
649 } while (i_group_max < (count - 1));
650 }
651 //----------------------------------------------------------------------------------------------------
652 //Returns true if the filename is within the specified path, or false otherwise.
653 int is_path_member(const char *fname, const char *path)
654 {
655 if (strlen(fname) == 0)
656 {
657 fatal("Zero-length filename.", __FILE__, __LINE__);
658 }
659 else if (strlen(path) == 0)
660 {
661 fatal("Zero-length path.", __FILE__, __LINE__);
662 }
663 else if (path[strlen(path) - 1] != '/')
664 {
665 fatal("Paths must canonically end with forward slash character.", __FILE__, __LINE__);
666 }
667 else if (strlen(fname) <= strlen(path))
668 {
669 //Can't be in the path because filename is not longer than path name.
670 return 0;
671 }
672 else if (memcmp(fname, path, strlen(path)) == 0)
673 {
674 return 1;
675 }
676 else
677 {
678 return 0;
679 }
680 }
681 //----------------------------------------------------------------------------------------------------
682 void option_dedup(char *fname, char *path, int may_delete, double pause_time)
683 {
684 tFileHashRecord *parsed_recs;
685 unsigned dupgroup;
686 unsigned count;
687 unsigned ui;
688 unsigned within_path;
689 unsigned i_group_min, i_group_max;
690
691 parseinputfile(&parsed_recs, &count, fname);
692 //printf("%u records parsed.\n", count);
693 sortinternaldsbyhash(parsed_recs, count);
694 sortinternalgroupfname(parsed_recs, count);
695
696 if (! count)
697 return;
698
699 dupgroup = 0;
700 i_group_min = 0;
701 i_group_max = 0;
702
703 do
704 {
705 //Advance i_group_max to the end of the group of duplicates.
706 while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
707 {
708 i_group_max++;
709 }
710
711 //If this is a group of duplicates.
712 if (i_group_min != i_group_max)
713 {
714 //Print the findings.
715 printf("Duplicate group %u:\n", dupgroup);
716 for (ui = i_group_min; ui <= i_group_max; ui++)
717 {
718 printf("%s\n", parsed_recs[ui].fname);
719 }
720
721 dupgroup++;
722
723 stdout_hline();
724
725 //Count how many of the group of duplicates are within the supplied path.
726 within_path = 0;
727 for (ui = i_group_min; ui <= i_group_max; ui++)
728 {
729 if (is_path_member(parsed_recs[ui].fname, path))
730 {
731 within_path++;
732 }
733 }
734
735 //We have to take different actions based on whether we do or don't have any within path.
736 //If we don't have any, we may delete nothing.
737 if (! within_path)
738 {
739 printf("None of these duplicates in path--taking no action.\n");
740 //stdout_hline();
741 }
742 else
743 {
744 for (ui = i_group_min; ui <= i_group_max; ui++)
745 {
746 if (is_path_member(parsed_recs[ui].fname, path))
747 {
748 printf("Not deleting: %s\n", parsed_recs[ui].fname);
749 }
750 else
751 {
752 printf("Deleting : %s\n", parsed_recs[ui].fname);
753 if (may_delete)
754 {
755 if (! unlink(parsed_recs[ui].fname))
756 {
757 printf(" File deleted (unlinked) successfully.\n");
758 }
759 else
760 {
761 printf(" Failure attempting to delete (unlink) file.\n");
762 }
763 }
764 else
765 {
766 printf(" Dry run only.\n");
767 }
768 }
769
770 //w_sleep(pause_time);
771 }
772 }
773
774 stdout_hline();
775 }
776
777 //On to the next group.
778 i_group_max++;
779 i_group_min = i_group_max;
780
781 } while (i_group_max < (count - 1));
782 }
783 //----------------------------------------------------------------------------------------------------
784 int main(int argc, char* argv[])
785 {
786 stdout_hline();
787 printf("Execution begins.\n");
788 stdout_hline();
789
790 if (argc == 1)
791 {
792 }
793 else if ((argc == 3) && (strcmp(argv[1], "ndups") == 0))
794 {
795 option_dups(argv[2]);
796 }
797 else if ((argc == 3) && (strcmp(argv[1], "filterdups") == 0))
798 {
799 option_filterdups(argv[2]);
800 }
801 else if ((argc == 3) && (strcmp(argv[1], "dedupnopath") == 0))
802 {
803 //option_filterdups(argv[2]);
804 }
805 else if ((argc == 3) && (strcmp(argv[1], "dryrunnopath") == 0))
806 {
807 //option_filterdups(argv[2]);
808 }
809 else if ((argc == 4) && (strcmp(argv[1], "dedup") == 0))
810 {
811 option_dedup(argv[2], argv[3], 1, UNLINKPAUSETIME);
812 }
813 else if ((argc == 4) && (strcmp(argv[1], "dryrun") == 0))
814 {
815 option_dedup(argv[2], argv[3], 0, UNLINKPAUSETIME/10.0);
816 }
817 else
818 {
819 printf("Unrecognized parameter form. Try \"dedup\".\n");
820 }
821
822 //w_sleep(-3 /* UNLINKPAUSETIME*/ );
823
824 //stdout_hline();
825 printf("Execution ends.\n");
826 stdout_hline();
827
828 return 0;
829 }
830 //----------------------------------------------------------------------------------------------------
831

Properties

Name Value
svn:eol-style native

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25