/[dtapublic]/projs/trunk/projs/20161007_dedup/qdedup.c
ViewVC logotype

Annotation of /projs/trunk/projs/20161007_dedup/qdedup.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 74 - (hide annotations) (download)
Sat Nov 5 16:51:05 2016 UTC (8 years, 1 month ago) by dashley
File MIME type: text/plain
File size: 28099 byte(s)
Documentation cleanup.
1 dashley 71 //----------------------------------------------------------------------------------------------------
2     //qdedup.c
3     //----------------------------------------------------------------------------------------------------
4     //Quick and dirty program to eliminate duplicates from a file tree. A file containing the SHA512
5 dashley 74 //hashes of all the files to be considered must already exist, and must be regenerated each time the
6     //underlying files are deleted/added/modified, which means the file must regenerated after each run
7     //of qdedup. (WARNING: IF YOU DO NOT REGENERATE THE FILE AFTER EACH RUN OF qdedup, YOU WILL
8     //PROBABLY DESTROY DATA. THE MECHANISM WOULD BE THAT THE SHA512 MANIFEST IMPLIES THAT DUPLICATES
9     //EXIST WHEN THEY NO LONGER DO, SO qdedup WILL ERRONEOUSLY DELETE THE LAST COPIES OF FILES.) The
10     //program will eliminate duplicates within a single specified directory or outside a single specified
11     //directory.
12     //
13     //This program will compile and run only on *nix systems and under Cygwin on Windows systems.
14 dashley 71 //----------------------------------------------------------------------------------------------------
15 dashley 74 //Copyright David T. Ashley (dashley@gmail.com), 2016.
16 dashley 71 //----------------------------------------------------------------------------------------------------
17 dashley 74 //Provided under the MIT LICENSE, reproduced immediately below.
18     //----------------------------------------------------------------------------------------------------
19     //Permission is hereby granted, free of charge, to any person obtaining a copy of
20     //this software and associated documentation files (the "Software"), to deal in the
21     //Software without restriction, including without limitation the rights to use,
22     //copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
23     //Software, and to permit persons to whom the Software is furnished to do so,
24     //subject to the following conditions:
25 dashley 71 //
26 dashley 74 //The above copyright notice and this permission notice shall be included in all
27     //copies or substantial portions of the Software.
28 dashley 71 //
29 dashley 74 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30     //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31     //FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32     //AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33     //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34     //OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35     //SOFTWARE.
36 dashley 71 //----------------------------------------------------------------------------------------------------
37     //All paths in the SHA512 file must be absolute or must be relative to the current working directory
38     //at the time this program is run.
39     //
40 dashley 74 //The recommended method to generate the SHA512 file is using the "-exec" option of the "find"
41     //command, i.e.
42     //
43     // find target_directory -type f -exec sha512sum {} \; >sha512sums.txt
44     //
45 dashley 71 //If any files are deleted by the program, a new SHA512 file must be generated before the program is
46     //run again to delete files. The reason for this restriction is that the program will never knowingly
47     //delete the last copy of a file. If the SHA512 file contains the digests of files that no longer
48     //exist, the program may unknowingly delete the last copies of files (because it believes based on
49     //the SHA512 file that other copies exist when in fact they do not).
50     //
51     //The SHA512 file does not need to be sorted (this program sorts it internally by hash before using it).
52     //
53     //This program is designed to compile and run under Cygwin or *nix only.
54     //
55     //Usage:
56     // qdedup
57     // Prints help information and exits.
58     // qdedup ndups <sha512file>
59     // Prints statistics about the number of duplicates in <sha512file>.
60     // qdedup filterdups <sha512file>
61     // Analyzes duplicates and prints the filenames of groups of duplicates. The output is designed
62     // for hand analysis so that insight can be gained into what duplicates exist and where they
63     // are located.
64 dashley 74 // qdedup dedup_preserve_inside <sha512file> <path>
65 dashley 71 // For each group of duplicates that exists, preserves the duplicates that exist within path
66     // and removes all others. If no copies of the duplicate exist within path, no copies of the
67     // duplicate will be removed.
68 dashley 74 // qdedup dryrun_preserve_inside <sha512file> <path>
69     // Exactly like "dedup_preserve_inside", except that no files will be deleted. Text will be
70     // output to explain what would be deleted by "dedup_preserve_inside".
71     // qdedup dedup_nopath <sha512file>
72 dashley 71 // For each group of duplicates that exists, preserves only the first (the only with lowest
73     // sort-order filename).
74 dashley 74 // qdedup dryrun_nopath <sha512file> <path>
75     // Exactly like "dedup_nopath", except that no files will be deleted. Text will be
76     // output to explain what would be deleted by "dedup_nopath".
77     // qdedup dedup_preserve_outside <sha512file> <path>
78     // For each group of duplicates that exists, deletes duplicates only from within the specified
79     // path. If any duplicates do not have at least one copy within <path> no instances of the
80     // duplicate are deleted.
81     // qdedup dryrun_preserve_outside <sha512file>
82     // Exactly like "dedup_preserve_outside", except that no files will be deleted. Text will be
83     // output to explain what would be deleted by "dedup_preserve_outside".
84 dashley 71 //----------------------------------------------------------------------------------------------------
85     #include <math.h>
86     #include <stdio.h>
87     #include <stdlib.h>
88     #include <string.h>
89     #include <time.h>
90     #include <unistd.h>
91     //----------------------------------------------------------------------------------------------------
92     #define LINELEN (78) //Number of printable characters in a line.
93     #define MAXLINELEN (2000) //The maximum number of characters that may be in a line of the
94     //SHA512 input file. This count includes the \0 terminator, so only
95     //this value minus 1 characters may be in a line.
96     #define UNLINKPAUSETIME (0.1) //Number of seconds to pause between file unlinks (deletions). This
97     //is designed to give the user time to abort the program if desired
98     //before catastrophic quantities of files are deleted.
99     //----------------------------------------------------------------------------------------------------
100     //Data structure that holds the character representation of and SHA512 hash, plus the specified
101     //filename.
102     typedef struct
103     {
104     char hash[129];
105     //512/4 = 128 characters for the hash, plus 1 character for zero terminator.
106     char *fname;
107     //Filename as specified in the file, allocated via malloc() family.
108     } tFileHashRecord;
109     //----------------------------------------------------------------------------------------------------
110     //----------------------------------------------------------------------------------------------------
111     //----- CHARACTER CLASSIFICATION FUNCTIONS ---------------------------------------------------------
112     //----------------------------------------------------------------------------------------------------
113     //----------------------------------------------------------------------------------------------------
114     //TRUE if character is part of valid hash.
115     int is_valid_hash_char(char c)
116     {
117     switch(c)
118     {
119     case '0':
120     case '1':
121     case '2':
122     case '3':
123     case '4':
124     case '5':
125     case '6':
126     case '7':
127     case '8':
128     case '9':
129     case 'a':
130     case 'b':
131     case 'c':
132     case 'd':
133     case 'e':
134     case 'f':
135     return(1);
136     break;
137     default:
138     return(0);
139     break;
140     }
141     }
142     //----------------------------------------------------------------------------------------------------
143     //TRUE if character is part of newline sequence
144     int is_newline_sequence_char(char c)
145     {
146     switch(c)
147     {
148     case 13:
149     case 10:
150     return(1);
151     break;
152     default:
153     return(0);
154     break;
155     }
156     }
157    
158     //----------------------------------------------------------------------------------------------------
159     //----------------------------------------------------------------------------------------------------
160     //----- FORMATTED OUTPUT FUNCTIONS -----------------------------------------------------------------
161     //----------------------------------------------------------------------------------------------------
162     //----------------------------------------------------------------------------------------------------
163     //Repeats a character to a stream a specified number of times.
164     //
165     void stream_rep_char(FILE *s, char c, unsigned n)
166     {
167     while(n--)
168     {
169     fprintf(s, "%c", c);
170     }
171     }
172     //----------------------------------------------------------------------------------------------------
173     //Prints a horizontal line to a stream, including the newline.
174     //
175     void stream_hline(FILE *s)
176     {
177     stream_rep_char(s, '-', LINELEN);
178     fprintf(s, "\n");
179     }
180     //----------------------------------------------------------------------------------------------------
181     //Prints a horizontal line to a stdout, including the newline.
182     //
183     void stdout_hline(void)
184     {
185     stream_rep_char(stdout, '-', LINELEN);
186     fprintf(stdout, "\n");
187     }
188     //----------------------------------------------------------------------------------------------------
189     //----------------------------------------------------------------------------------------------------
190     //----- FATAL ERROR FUNCTIONS ----------------------------------------------------------------------
191     //----------------------------------------------------------------------------------------------------
192     //----------------------------------------------------------------------------------------------------
193     //Errors out fatally.
194     //
195     void fatal(const char *desc, const char *file, unsigned line)
196     {
197     stdout_hline();
198     printf("Fatal error: %s\n", desc);
199     printf("Source file: %s\n", file);
200     printf("Line : %u\n", line);
201     stdout_hline();
202     exit(1);
203     }
204     //----------------------------------------------------------------------------------------------------
205     //----------------------------------------------------------------------------------------------------
206     //----- MEMORY ALLOCATION WRAPPERS -----------------------------------------------------------------
207     //----------------------------------------------------------------------------------------------------
208     //----------------------------------------------------------------------------------------------------
209     //malloc() wrapper.
210     void *w_malloc(size_t nbytes)
211     {
212     void *rv;
213    
214     if (!nbytes)
215     {
216     fatal("Memory allocation request for 0 bytes.", __FILE__, __LINE__);
217     }
218    
219     rv = malloc(nbytes);
220    
221     if (!rv)
222     {
223     fatal("Out of memory in malloc() request.", __FILE__, __LINE__);
224     }
225    
226     //Zero out, just for consistency.
227     memset(rv, 0, nbytes);
228     }
229     //----------------------------------------------------------------------------------------------------
230     //realloc() wrapper.
231     void *w_realloc(void *p, size_t n)
232     {
233     void *rv;
234    
235     if (!n)
236     {
237     fatal("Memory reallocation request for 0 bytes.", __FILE__, __LINE__);
238     }
239    
240     if (!p)
241     {
242     fatal("Memory reallocation request with NULL pointer.", __FILE__, __LINE__);
243     }
244    
245     rv = realloc(p, n);
246    
247     if (!rv)
248     {
249     fatal("Out of memory in realloc() request.", __FILE__, __LINE__);
250     }
251     }
252     //----------------------------------------------------------------------------------------------------
253     //----------------------------------------------------------------------------------------------------
254     //----- SLEEP FUNCTIONS ----------------------------------------------------------------------------
255     //----------------------------------------------------------------------------------------------------
256     //----------------------------------------------------------------------------------------------------
257     //Sleep for a time, in seconds.
258     void w_sleep(double seconds)
259     {
260     struct timespec t;
261    
262     if (seconds < 0)
263     {
264     fatal("Sleep for negative time request.", __FILE__, __LINE__);
265     }
266     else if (seconds > 3600)
267     {
268     fatal("Sleep for too long request.", __FILE__, __LINE__);
269     }
270    
271     t.tv_sec = floor(seconds);
272     t.tv_nsec = (seconds - floor(seconds)) * 1E9;
273    
274     nanosleep(&t, NULL);
275     }
276     //----------------------------------------------------------------------------------------------------
277     //----------------------------------------------------------------------------------------------------
278     //----- SHA512 FIELD READ FUNCTIONS ----------------------------------------------------------------
279     //----------------------------------------------------------------------------------------------------
280     //----------------------------------------------------------------------------------------------------
281     //These functions read in an individual field of a standard SHA512 file generated using application
282     //of the standard sha512sum program.
283     //
284     //*rcode = 1, success.
285     // 0, legal end of file, record assigned.
286     void get_sha512file_line(FILE *s, int *rcode, tFileHashRecord *hash_rec)
287     {
288     unsigned bidx;
289     unsigned nchars;
290     int ic;
291     int exitflag;
292     int eoffound;
293     int eolfound;
294     char c;
295     char buf[MAXLINELEN];
296    
297     //Zero out the buffer. This handles string termination automatically.
298     memset(buf, 0, sizeof(buf));
299    
300     //Read characters into the buffer until either hit EOF, newline, or can't
301     //fill the buffer any longer.
302     eoffound = 0;
303     eolfound = 0;
304     exitflag = 0;
305     bidx = 0;
306     do
307     {
308     ic = fgetc(s);
309     c = ic;
310    
311     if (ic == EOF)
312     {
313     eoffound = 1;
314     eolfound = 0;
315     nchars = bidx;
316     exitflag = 1;
317     }
318     else if (is_newline_sequence_char(c))
319     {
320     eoffound = 0;
321     eolfound = 1;
322     nchars = bidx;
323     exitflag = 1;
324     }
325     else if (bidx >= (MAXLINELEN - 1))
326     {
327     fatal("SHA512 hash file line too long to parse.", __FILE__, __LINE__);
328     }
329     else
330     {
331     buf[bidx] = c;
332     bidx++;
333     exitflag = 0;
334     }
335     } while(! exitflag);
336    
337     //If we encountered a newline, inch past it. We may encounter an EOF.
338     if (eolfound)
339     {
340     exitflag = 0;
341     do
342     {
343     ic = fgetc(s);
344     c = ic;
345    
346     if (ic == EOF)
347     {
348     eoffound = 1;
349     eolfound = 0;
350     exitflag = 1;
351     }
352     else if (is_newline_sequence_char(c))
353     {
354     exitflag = 0;
355     }
356     else
357     {
358     //We hit the next line. Put the character back.
359     eoffound = 0;
360     eolfound = 1;
361     ungetc(ic, s);
362     exitflag = 1;
363     }
364     } while(! exitflag);
365     }
366    
367     //For better or worse, we have a \0-terminated line in the buffer.
368     //
369     //Zero the caller's area. This takes care of the hash terminator as well.
370     memset(hash_rec, 0, sizeof(*hash_rec));
371    
372     //Ensure that we have at least 128 characters, and they are all hex characters.
373     //Otherwise, we can't proceed.
374     if (nchars < 128)
375     {
376     fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
377     }
378     else
379     {
380     for (bidx = 0; bidx < 128; bidx++)
381     {
382     if (! is_valid_hash_char(buf[bidx]))
383     {
384     fatal("Character in SHA512 hash portion of line inconsistent with hash.", __FILE__, __LINE__);
385     }
386     }
387     }
388    
389     //The 129th and 130'th character must be present and must be a space and asterisk, respectively.
390     if (nchars < 130)
391     {
392     fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
393     }
394     else if (buf[128] != ' ')
395     {
396     fatal("129th hash line character must be \" \".", __FILE__, __LINE__);
397     }
398     else if (buf[129] != '*')
399     {
400     fatal("130th hash line character must be \"*\".", __FILE__, __LINE__);
401     }
402    
403     //There must be a 131'st character. Beyond that, we can't qualify, because filenames may
404     //have odd characters and may be of any length.
405     if (nchars < 131)
406     {
407     fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
408     }
409    
410     //Copy the hash to the caller's area. The terminator has already been inserted.
411     memcpy(&(hash_rec->hash[0]), buf, 128);
412    
413     //Allocate space for the filename.
414     hash_rec->fname = w_malloc(strlen(buf+130) + 1);
415    
416     //Make the copy.
417     strcpy(hash_rec->fname, buf+130);
418    
419     if (eoffound)
420     *rcode = 0;
421     else
422     *rcode = 1;
423     }
424     //----------------------------------------------------------------------------------------------------
425     void parseinputfile(tFileHashRecord **parsed_recs, unsigned *count, char *fname)
426     {
427     FILE *s;
428     int rcode;
429    
430     //Try to open the file for reading. Inability is a failure.
431     s = fopen(fname, "r");
432     if (!s)
433     {
434     fatal("Hash file open failure.", __FILE__, __LINE__);
435     }
436    
437     //Start off with a count of 0 and a NULL pointer.
438     *count = 0;
439     *parsed_recs = NULL;
440    
441     do
442     {
443     //For the first time, allocate space for one record. Beyond that,
444     //expand it.
445     if (! *parsed_recs)
446     {
447     *parsed_recs = w_malloc(sizeof(tFileHashRecord));
448     }
449     else
450     {
451     *parsed_recs = w_realloc(*parsed_recs, (size_t)((*count + 1)) * sizeof(tFileHashRecord));
452     }
453    
454     //Parse and fill in the space.
455     get_sha512file_line(s, &rcode, (*parsed_recs) + (*count));
456    
457     //We now have one more.
458     (*count)++;
459     } while(rcode == 1);
460    
461     //Try to close the file. Inability is a failure.
462     if (fclose(s))
463     {
464     fatal("Hash file close failure.", __FILE__, __LINE__);
465     }
466     }
467     //----------------------------------------------------------------------------------------------------
468     int sortcmpascendinghash(const void *p0_in, const void *p1_in)
469     {
470     const tFileHashRecord *p0, *p1;
471    
472     p0 = p0_in;
473     p1 = p1_in;
474    
475     return(strcmp(p0->hash, p1->hash));
476     }
477    
478     //----------------------------------------------------------------------------------------------------
479     void sortinternaldsbyhash(tFileHashRecord *parsed_recs, unsigned count)
480     {
481     qsort(parsed_recs, count, sizeof(tFileHashRecord), sortcmpascendinghash);
482     }
483     //----------------------------------------------------------------------------------------------------
484     int sortcmpascendingfname(const void *p0_in, const void *p1_in)
485     {
486     const tFileHashRecord *p0, *p1;
487    
488     p0 = p0_in;
489     p1 = p1_in;
490    
491     return(strcmp(p0->fname, p1->fname));
492     }
493     //----------------------------------------------------------------------------------------------------
494     //This sort has to be run after the hash sort. Within groups of identical hashes, it sorts by
495     //ascending filename.
496     void sortinternalgroupfname(tFileHashRecord *parsed_recs, unsigned count)
497     {
498     unsigned ui;
499     unsigned i_group_min, i_group_max;
500    
501     if (! count)
502     return;
503    
504     i_group_min = 0;
505     i_group_max = 0;
506    
507     do
508     {
509     //Advance i_group_max to the end of the group of duplicates.
510     while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
511     {
512     i_group_max++;
513     }
514    
515     if (i_group_min != i_group_max)
516     {
517     //Sort the internal group.
518     qsort(parsed_recs + i_group_min,
519     i_group_max - i_group_min + 1,
520     sizeof(tFileHashRecord),
521     sortcmpascendingfname);
522     }
523    
524     //On to the next group.
525     i_group_max++;
526     i_group_min = i_group_max;
527    
528     } while (i_group_max < (count - 1));
529     }
530     //----------------------------------------------------------------------------------------------------
531     void printsinglerecord(tFileHashRecord *rec, unsigned elno)
532     {
533     printf("[%9u]\n", elno);
534     printf("Hash : %s\n", rec->hash);
535     printf("Filename : %s\n", rec->fname);
536     stdout_hline();
537     }
538     //----------------------------------------------------------------------------------------------------
539     void printinternalds(tFileHashRecord *parsed_recs, unsigned count)
540     {
541     unsigned i;
542    
543     for (i=0; i<count; i++)
544     {
545     printsinglerecord(parsed_recs + i, i);
546     }
547     }
548     //----------------------------------------------------------------------------------------------------
549     void gather_dup_stats(tFileHashRecord *parsed_recs, unsigned count, unsigned *out_num_dups, unsigned *out_cumulative_dups)
550     {
551     unsigned i_group_min, i_group_max;
552    
553     *out_num_dups = 0;
554     *out_cumulative_dups = 0;
555    
556     if (! count)
557     return;
558    
559     i_group_min = 0;
560     i_group_max = 0;
561    
562     do
563     {
564     //Advance i_group_max to the end of the group of duplicates.
565     while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
566     {
567     i_group_max++;
568     }
569    
570     //Log the findings.
571     if (i_group_min != i_group_max)
572     {
573     (*out_num_dups)++;
574     (*out_cumulative_dups) += (i_group_max - i_group_min + 1);
575     }
576    
577     //On to the next group.
578     i_group_max++;
579     i_group_min = i_group_max;
580    
581     } while (i_group_max < (count - 1));
582     }
583     //----------------------------------------------------------------------------------------------------
584     void option_dups(char *fname)
585     {
586     tFileHashRecord *parsed_recs;
587     unsigned count, num_dups, cumulative_dups;
588    
589     parseinputfile(&parsed_recs, &count, fname);
590     //printf("%u records parsed.\n", count);
591     sortinternaldsbyhash(parsed_recs, count);
592     sortinternalgroupfname(parsed_recs, count);
593     printinternalds(parsed_recs, count);
594     stdout_hline();
595     gather_dup_stats(parsed_recs, count, &num_dups, &cumulative_dups);
596     printf("Number of duplicated files : %u\n", num_dups);
597     if (num_dups)
598     {
599     printf("Average number of duplicates: %.2f\n", (double)cumulative_dups/(double)num_dups);
600     }
601     }
602     //----------------------------------------------------------------------------------------------------
603     void option_filterdups(char *fname)
604     {
605     tFileHashRecord *parsed_recs;
606     unsigned dupgroup;
607     unsigned count;
608     unsigned ui;
609     unsigned i_group_min, i_group_max;
610    
611     parseinputfile(&parsed_recs, &count, fname);
612     //printf("%u records parsed.\n", count);
613     sortinternaldsbyhash(parsed_recs, count);
614     sortinternalgroupfname(parsed_recs, count);
615    
616     if (! count)
617     return;
618    
619     dupgroup = 0;
620     i_group_min = 0;
621     i_group_max = 0;
622    
623     do
624     {
625     //Advance i_group_max to the end of the group of duplicates.
626     while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
627     {
628     i_group_max++;
629     }
630    
631     //Print the findings.
632     if (i_group_min != i_group_max)
633     {
634     printf("Duplicate group %u:\n", dupgroup);
635     for (ui = i_group_min; ui <= i_group_max; ui++)
636     {
637     printf("%s\n", parsed_recs[ui].fname);
638     }
639    
640     dupgroup++;
641    
642     stdout_hline();
643     }
644    
645     //On to the next group.
646     i_group_max++;
647     i_group_min = i_group_max;
648    
649     } while (i_group_max < (count - 1));
650     }
651     //----------------------------------------------------------------------------------------------------
652     //Returns true if the filename is within the specified path, or false otherwise.
653     int is_path_member(const char *fname, const char *path)
654     {
655     if (strlen(fname) == 0)
656     {
657     fatal("Zero-length filename.", __FILE__, __LINE__);
658     }
659     else if (strlen(path) == 0)
660     {
661     fatal("Zero-length path.", __FILE__, __LINE__);
662     }
663     else if (path[strlen(path) - 1] != '/')
664     {
665     fatal("Paths must canonically end with forward slash character.", __FILE__, __LINE__);
666     }
667     else if (strlen(fname) <= strlen(path))
668     {
669     //Can't be in the path because filename is not longer than path name.
670     return 0;
671     }
672     else if (memcmp(fname, path, strlen(path)) == 0)
673     {
674     return 1;
675     }
676     else
677     {
678     return 0;
679     }
680     }
681     //----------------------------------------------------------------------------------------------------
682     void option_dedup(char *fname, char *path, int may_delete, double pause_time)
683     {
684     tFileHashRecord *parsed_recs;
685     unsigned dupgroup;
686     unsigned count;
687     unsigned ui;
688     unsigned within_path;
689     unsigned i_group_min, i_group_max;
690    
691     parseinputfile(&parsed_recs, &count, fname);
692     //printf("%u records parsed.\n", count);
693     sortinternaldsbyhash(parsed_recs, count);
694     sortinternalgroupfname(parsed_recs, count);
695    
696     if (! count)
697     return;
698    
699     dupgroup = 0;
700     i_group_min = 0;
701     i_group_max = 0;
702    
703     do
704     {
705     //Advance i_group_max to the end of the group of duplicates.
706     while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
707     {
708     i_group_max++;
709     }
710    
711     //If this is a group of duplicates.
712     if (i_group_min != i_group_max)
713     {
714     //Print the findings.
715     printf("Duplicate group %u:\n", dupgroup);
716     for (ui = i_group_min; ui <= i_group_max; ui++)
717     {
718     printf("%s\n", parsed_recs[ui].fname);
719     }
720    
721     dupgroup++;
722    
723     stdout_hline();
724    
725     //Count how many of the group of duplicates are within the supplied path.
726     within_path = 0;
727     for (ui = i_group_min; ui <= i_group_max; ui++)
728     {
729     if (is_path_member(parsed_recs[ui].fname, path))
730     {
731     within_path++;
732     }
733     }
734    
735     //We have to take different actions based on whether we do or don't have any within path.
736     //If we don't have any, we may delete nothing.
737     if (! within_path)
738     {
739     printf("None of these duplicates in path--taking no action.\n");
740     //stdout_hline();
741     }
742     else
743     {
744     for (ui = i_group_min; ui <= i_group_max; ui++)
745     {
746     if (is_path_member(parsed_recs[ui].fname, path))
747     {
748     printf("Not deleting: %s\n", parsed_recs[ui].fname);
749     }
750     else
751     {
752     printf("Deleting : %s\n", parsed_recs[ui].fname);
753     if (may_delete)
754     {
755     if (! unlink(parsed_recs[ui].fname))
756     {
757     printf(" File deleted (unlinked) successfully.\n");
758     }
759     else
760     {
761     printf(" Failure attempting to delete (unlink) file.\n");
762     }
763     }
764     else
765     {
766     printf(" Dry run only.\n");
767     }
768     }
769    
770     //w_sleep(pause_time);
771     }
772     }
773    
774     stdout_hline();
775     }
776    
777     //On to the next group.
778     i_group_max++;
779     i_group_min = i_group_max;
780    
781     } while (i_group_max < (count - 1));
782     }
783     //----------------------------------------------------------------------------------------------------
784     int main(int argc, char* argv[])
785     {
786     stdout_hline();
787     printf("Execution begins.\n");
788     stdout_hline();
789    
790     if (argc == 1)
791     {
792     }
793     else if ((argc == 3) && (strcmp(argv[1], "ndups") == 0))
794     {
795     option_dups(argv[2]);
796     }
797     else if ((argc == 3) && (strcmp(argv[1], "filterdups") == 0))
798     {
799     option_filterdups(argv[2]);
800     }
801     else if ((argc == 3) && (strcmp(argv[1], "dedupnopath") == 0))
802     {
803     //option_filterdups(argv[2]);
804     }
805     else if ((argc == 3) && (strcmp(argv[1], "dryrunnopath") == 0))
806     {
807     //option_filterdups(argv[2]);
808     }
809     else if ((argc == 4) && (strcmp(argv[1], "dedup") == 0))
810     {
811     option_dedup(argv[2], argv[3], 1, UNLINKPAUSETIME);
812     }
813     else if ((argc == 4) && (strcmp(argv[1], "dryrun") == 0))
814     {
815     option_dedup(argv[2], argv[3], 0, UNLINKPAUSETIME/10.0);
816     }
817     else
818     {
819     printf("Unrecognized parameter form. Try \"dedup\".\n");
820     }
821    
822     //w_sleep(-3 /* UNLINKPAUSETIME*/ );
823    
824     //stdout_hline();
825     printf("Execution ends.\n");
826     stdout_hline();
827    
828     return 0;
829     }
830     //----------------------------------------------------------------------------------------------------
831    

Properties

Name Value
svn:eol-style native

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25