/[dtapublic]/projs/dtats/trunk/projs/2016/20161007_ddeedduupp/win/ddeedduupp/ddeedduupp.cpp
ViewVC logotype

Annotation of /projs/dtats/trunk/projs/2016/20161007_ddeedduupp/win/ddeedduupp/ddeedduupp.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 310 - (hide annotations) (download)
Wed Jan 1 23:15:08 2020 UTC (4 years, 5 months ago) by dashley
File size: 27071 byte(s)
Remove extra file.
Place file contents in new project for analysis.
1 dashley 309 // ddeedduupp.cpp : This file contains the 'main' function. Program execution begins and ends there.
2     //
3    
4     #include <iostream>
5    
6     int main()
7     {
8     std::cout << "Hello World!\n";
9     }
10    
11     // Run program: Ctrl + F5 or Debug > Start Without Debugging menu
12     // Debug program: F5 or Debug > Start Debugging menu
13    
14     // Tips for Getting Started:
15     // 1. Use the Solution Explorer window to add/manage files
16     // 2. Use the Team Explorer window to connect to source control
17     // 3. Use the Output window to see build output and other messages
18     // 4. Use the Error List window to view errors
19     // 5. Go to Project > Add New Item to create new code files, or Project > Add Existing Item to add existing code files to the project
20     // 6. In the future, to open this project again, go to File > Open > Project and select the .sln file
21 dashley 310
22     #if 0
23     //----------------------------------------------------------------------------------------------------
24     //$Header$
25     //----------------------------------------------------------------------------------------------------
26     //qdedup.c
27     //----------------------------------------------------------------------------------------------------
28     //Quick and dirty program to eliminate duplicates from a file tree. A file containing the SHA512
29     //hashes of all the files to be considered must already exist, and must be regenerated each time the
30     //underlying files are deleted/added/modified, which means the file must regenerated after each run
31     //of qdedup. (WARNING: IF YOU DO NOT REGENERATE THE FILE AFTER EACH RUN OF qdedup, YOU WILL
32     //PROBABLY DESTROY DATA. THE MECHANISM WOULD BE THAT THE SHA512 MANIFEST IMPLIES THAT DUPLICATES
33     //EXIST WHEN THEY NO LONGER DO, SO qdedup WILL ERRONEOUSLY DELETE THE LAST COPIES OF FILES.) The
34     //program will eliminate duplicates within a single specified directory or outside a single specified
35     //directory.
36     //
37     //This program will compile and run only on *nix systems and under Cygwin on Windows systems.
38     //----------------------------------------------------------------------------------------------------
39     //Copyright David T. Ashley (dashley@gmail.com), 2016.
40     //----------------------------------------------------------------------------------------------------
41     //Provided under the MIT LICENSE, reproduced immediately below.
42     //----------------------------------------------------------------------------------------------------
43     //Permission is hereby granted, free of charge, to any person obtaining a copy of
44     //this software and associated documentation files (the "Software"), to deal in the
45     //Software without restriction, including without limitation the rights to use,
46     //copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
47     //Software, and to permit persons to whom the Software is furnished to do so,
48     //subject to the following conditions:
49     //
50     //The above copyright notice and this permission notice shall be included in all
51     //copies or substantial portions of the Software.
52     //
53     //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
54     //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
55     //FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
56     //AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
57     //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
58     //OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
59     //SOFTWARE.
60     //----------------------------------------------------------------------------------------------------
61     //All paths in the SHA512 file must be absolute or must be relative to the current working directory
62     //at the time this program is run.
63     //
64     //The recommended method to generate the SHA512 file is using the "-exec" option of the "find"
65     //command, i.e.
66     //
67     // find target_directory -type f -exec sha512sum {} \; >sha512sums.txt
68     //
69     //If any files are deleted by the program, a new SHA512 file must be generated before the program is
70     //run again to delete files. The reason for this restriction is that the program will never knowingly
71     //delete the last copy of a file. If the SHA512 file contains the digests of files that no longer
72     //exist, the program may unknowingly delete the last copies of files (because it believes based on
73     //the SHA512 file that other copies exist when in fact they do not).
74     //
75     //The SHA512 file does not need to be sorted (this program sorts it internally by hash before using it).
76     //
77     //This program is designed to compile and run under Cygwin or *nix only.
78     //
79     //Usage:
80     // qdedup
81     // Prints help information and exits.
82     // qdedup ndups <sha512file>
83     // Prints statistics about the number of duplicates in <sha512file>.
84     // qdedup filterdups <sha512file>
85     // Analyzes duplicates and prints the filenames of groups of duplicates. The output is designed
86     // for hand analysis so that insight can be gained into what duplicates exist and where they
87     // are located.
88     // qdedup dedup_preserve_inside <sha512file> <path>
89     // For each group of duplicates that exists, preserves the duplicates that exist within path
90     // and removes all others. If no copies of the duplicate exist within path, no copies of the
91     // duplicate will be removed.
92     // qdedup dryrun_preserve_inside <sha512file> <path>
93     // Exactly like "dedup_preserve_inside", except that no files will be deleted. Text will be
94     // output to explain what would be deleted by "dedup_preserve_inside".
95     // qdedup dedup_nopath <sha512file>
96     // For each group of duplicates that exists, preserves only the first (the only with lowest
97     // sort-order filename).
98     // qdedup dryrun_nopath <sha512file> <path>
99     // Exactly like "dedup_nopath", except that no files will be deleted. Text will be
100     // output to explain what would be deleted by "dedup_nopath".
101     // qdedup dedup_preserve_outside <sha512file> <path>
102     // For each group of duplicates that exists, deletes duplicates only from within the specified
103     // path. If any duplicates do not have at least one copy within <path> no instances of the
104     // duplicate are deleted.
105     // qdedup dryrun_preserve_outside <sha512file>
106     // Exactly like "dedup_preserve_outside", except that no files will be deleted. Text will be
107     // output to explain what would be deleted by "dedup_preserve_outside".
108     //----------------------------------------------------------------------------------------------------
109     #include <math.h>
110     #include <stdio.h>
111     #include <stdlib.h>
112     #include <string.h>
113     #include <time.h>
114     #include <unistd.h>
115     //----------------------------------------------------------------------------------------------------
116     #define LINELEN (78) //Number of printable characters in a line.
117     #define MAXLINELEN (2000) //The maximum number of characters that may be in a line of the
118     //SHA512 input file. This count includes the \0 terminator, so only
119     //this value minus 1 characters may be in a line.
120     #define UNLINKPAUSETIME (0.1) //Number of seconds to pause between file unlinks (deletions). This
121     //is designed to give the user time to abort the program if desired
122     //before catastrophic quantities of files are deleted.
123     //----------------------------------------------------------------------------------------------------
124     //Data structure that holds the character representation of and SHA512 hash, plus the specified
125     //filename.
126     typedef struct
127     {
128     char hash[129];
129     //512/4 = 128 characters for the hash, plus 1 character for zero terminator.
130     char* fname;
131     //Filename as specified in the file, allocated via malloc() family.
132     } tFileHashRecord;
133     //----------------------------------------------------------------------------------------------------
134     //----------------------------------------------------------------------------------------------------
135     //----- CHARACTER CLASSIFICATION FUNCTIONS ---------------------------------------------------------
136     //----------------------------------------------------------------------------------------------------
137     //----------------------------------------------------------------------------------------------------
138     //TRUE if character is part of valid hash.
139     int is_valid_hash_char(char c)
140     {
141     switch (c)
142     {
143     case '0':
144     case '1':
145     case '2':
146     case '3':
147     case '4':
148     case '5':
149     case '6':
150     case '7':
151     case '8':
152     case '9':
153     case 'a':
154     case 'b':
155     case 'c':
156     case 'd':
157     case 'e':
158     case 'f':
159     return(1);
160     break;
161     default:
162     return(0);
163     break;
164     }
165     }
166     //----------------------------------------------------------------------------------------------------
167     //TRUE if character is part of newline sequence
168     int is_newline_sequence_char(char c)
169     {
170     switch (c)
171     {
172     case 13:
173     case 10:
174     return(1);
175     break;
176     default:
177     return(0);
178     break;
179     }
180     }
181    
182     //----------------------------------------------------------------------------------------------------
183     //----------------------------------------------------------------------------------------------------
184     //----- FORMATTED OUTPUT FUNCTIONS -----------------------------------------------------------------
185     //----------------------------------------------------------------------------------------------------
186     //----------------------------------------------------------------------------------------------------
187     //Repeats a character to a stream a specified number of times.
188     //
189     void stream_rep_char(FILE* s, char c, unsigned n)
190     {
191     while (n--)
192     {
193     fprintf(s, "%c", c);
194     }
195     }
196     //----------------------------------------------------------------------------------------------------
197     //Prints a horizontal line to a stream, including the newline.
198     //
199     void stream_hline(FILE* s)
200     {
201     stream_rep_char(s, '-', LINELEN);
202     fprintf(s, "\n");
203     }
204     //----------------------------------------------------------------------------------------------------
205     //Prints a horizontal line to a stdout, including the newline.
206     //
207     void stdout_hline(void)
208     {
209     stream_rep_char(stdout, '-', LINELEN);
210     fprintf(stdout, "\n");
211     }
212     //----------------------------------------------------------------------------------------------------
213     //----------------------------------------------------------------------------------------------------
214     //----- FATAL ERROR FUNCTIONS ----------------------------------------------------------------------
215     //----------------------------------------------------------------------------------------------------
216     //----------------------------------------------------------------------------------------------------
217     //Errors out fatally.
218     //
219     void fatal(const char* desc, const char* file, unsigned line)
220     {
221     stdout_hline();
222     printf("Fatal error: %s\n", desc);
223     printf("Source file: %s\n", file);
224     printf("Line : %u\n", line);
225     stdout_hline();
226     exit(1);
227     }
228     //----------------------------------------------------------------------------------------------------
229     //----------------------------------------------------------------------------------------------------
230     //----- MEMORY ALLOCATION WRAPPERS -----------------------------------------------------------------
231     //----------------------------------------------------------------------------------------------------
232     //----------------------------------------------------------------------------------------------------
233     //malloc() wrapper.
234     void* w_malloc(size_t nbytes)
235     {
236     void* rv;
237    
238     if (!nbytes)
239     {
240     fatal("Memory allocation request for 0 bytes.", __FILE__, __LINE__);
241     }
242    
243     rv = malloc(nbytes);
244    
245     if (!rv)
246     {
247     fatal("Out of memory in malloc() request.", __FILE__, __LINE__);
248     }
249    
250     //Zero out, just for consistency.
251     memset(rv, 0, nbytes);
252     }
253     //----------------------------------------------------------------------------------------------------
254     //realloc() wrapper.
255     void* w_realloc(void* p, size_t n)
256     {
257     void* rv;
258    
259     if (!n)
260     {
261     fatal("Memory reallocation request for 0 bytes.", __FILE__, __LINE__);
262     }
263    
264     if (!p)
265     {
266     fatal("Memory reallocation request with NULL pointer.", __FILE__, __LINE__);
267     }
268    
269     rv = realloc(p, n);
270    
271     if (!rv)
272     {
273     fatal("Out of memory in realloc() request.", __FILE__, __LINE__);
274     }
275     }
276     //----------------------------------------------------------------------------------------------------
277     //----------------------------------------------------------------------------------------------------
278     //----- SLEEP FUNCTIONS ----------------------------------------------------------------------------
279     //----------------------------------------------------------------------------------------------------
280     //----------------------------------------------------------------------------------------------------
281     //Sleep for a time, in seconds.
282     void w_sleep(double seconds)
283     {
284     struct timespec t;
285    
286     if (seconds < 0)
287     {
288     fatal("Sleep for negative time request.", __FILE__, __LINE__);
289     }
290     else if (seconds > 3600)
291     {
292     fatal("Sleep for too long request.", __FILE__, __LINE__);
293     }
294    
295     t.tv_sec = floor(seconds);
296     t.tv_nsec = (seconds - floor(seconds)) * 1E9;
297    
298     nanosleep(&t, NULL);
299     }
300     //----------------------------------------------------------------------------------------------------
301     //----------------------------------------------------------------------------------------------------
302     //----- SHA512 FIELD READ FUNCTIONS ----------------------------------------------------------------
303     //----------------------------------------------------------------------------------------------------
304     //----------------------------------------------------------------------------------------------------
305     //These functions read in an individual field of a standard SHA512 file generated using application
306     //of the standard sha512sum program.
307     //
308     //*rcode = 1, success.
309     // 0, legal end of file, record assigned.
310     void get_sha512file_line(FILE* s, int* rcode, tFileHashRecord* hash_rec)
311     {
312     unsigned bidx;
313     unsigned nchars;
314     int ic;
315     int exitflag;
316     int eoffound;
317     int eolfound;
318     char c;
319     char buf[MAXLINELEN];
320    
321     //Zero out the buffer. This handles string termination automatically.
322     memset(buf, 0, sizeof(buf));
323    
324     //Read characters into the buffer until either hit EOF, newline, or can't
325     //fill the buffer any longer.
326     eoffound = 0;
327     eolfound = 0;
328     exitflag = 0;
329     bidx = 0;
330     do
331     {
332     ic = fgetc(s);
333     c = ic;
334    
335     if (ic == EOF)
336     {
337     eoffound = 1;
338     eolfound = 0;
339     nchars = bidx;
340     exitflag = 1;
341     }
342     else if (is_newline_sequence_char(c))
343     {
344     eoffound = 0;
345     eolfound = 1;
346     nchars = bidx;
347     exitflag = 1;
348     }
349     else if (bidx >= (MAXLINELEN - 1))
350     {
351     fatal("SHA512 hash file line too long to parse.", __FILE__, __LINE__);
352     }
353     else
354     {
355     buf[bidx] = c;
356     bidx++;
357     exitflag = 0;
358     }
359     } while (!exitflag);
360    
361     //If we encountered a newline, inch past it. We may encounter an EOF.
362     if (eolfound)
363     {
364     exitflag = 0;
365     do
366     {
367     ic = fgetc(s);
368     c = ic;
369    
370     if (ic == EOF)
371     {
372     eoffound = 1;
373     eolfound = 0;
374     exitflag = 1;
375     }
376     else if (is_newline_sequence_char(c))
377     {
378     exitflag = 0;
379     }
380     else
381     {
382     //We hit the next line. Put the character back.
383     eoffound = 0;
384     eolfound = 1;
385     ungetc(ic, s);
386     exitflag = 1;
387     }
388     } while (!exitflag);
389     }
390    
391     //For better or worse, we have a \0-terminated line in the buffer.
392     //
393     //Zero the caller's area. This takes care of the hash terminator as well.
394     memset(hash_rec, 0, sizeof(*hash_rec));
395    
396     //Ensure that we have at least 128 characters, and they are all hex characters.
397     //Otherwise, we can't proceed.
398     if (nchars < 128)
399     {
400     fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
401     }
402     else
403     {
404     for (bidx = 0; bidx < 128; bidx++)
405     {
406     if (!is_valid_hash_char(buf[bidx]))
407     {
408     fatal("Character in SHA512 hash portion of line inconsistent with hash.", __FILE__, __LINE__);
409     }
410     }
411     }
412    
413     //The 129th and 130'th character must be present and must be a space and asterisk, respectively.
414     if (nchars < 130)
415     {
416     fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
417     }
418     else if (buf[128] != ' ')
419     {
420     fatal("129th hash line character must be \" \".", __FILE__, __LINE__);
421     }
422     else if (buf[129] != '*')
423     {
424     fatal("130th hash line character must be \"*\".", __FILE__, __LINE__);
425     }
426     // else if (buf[129] != ' ')
427     // {
428     // //130th character is ' '. Need to figure out why sometimes space and sometimes '*'.
429     // fatal("130th hash line character must be \" \".", __FILE__, __LINE__);
430     // }
431    
432     //There must be a 131'st character. Beyond that, we can't qualify, because filenames may
433     //have odd characters and may be of any length.
434     if (nchars < 131)
435     {
436     fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
437     }
438    
439     //Copy the hash to the caller's area. The terminator has already been inserted.
440     memcpy(&(hash_rec->hash[0]), buf, 128);
441    
442     //Allocate space for the filename.
443     hash_rec->fname = w_malloc(strlen(buf + 130) + 1);
444    
445     //Make the copy.
446     strcpy(hash_rec->fname, buf + 130);
447    
448     if (eoffound)
449     * rcode = 0;
450     else
451     *rcode = 1;
452     }
453     //----------------------------------------------------------------------------------------------------
454     void parseinputfile(tFileHashRecord** parsed_recs, unsigned* count, char* fname)
455     {
456     FILE* s;
457     int rcode;
458    
459     //Try to open the file for reading. Inability is a failure.
460     s = fopen(fname, "r");
461     if (!s)
462     {
463     fatal("Hash file open failure.", __FILE__, __LINE__);
464     }
465    
466     //Start off with a count of 0 and a NULL pointer.
467     *count = 0;
468     *parsed_recs = NULL;
469    
470     do
471     {
472     //For the first time, allocate space for one record. Beyond that,
473     //expand it.
474     if (!*parsed_recs)
475     {
476     *parsed_recs = w_malloc(sizeof(tFileHashRecord));
477     }
478     else
479     {
480     *parsed_recs = w_realloc(*parsed_recs, (size_t)((*count + 1)) * sizeof(tFileHashRecord));
481     }
482    
483     //Parse and fill in the space.
484     get_sha512file_line(s, &rcode, (*parsed_recs) + (*count));
485    
486     //We now have one more.
487     (*count)++;
488     } while (rcode == 1);
489    
490     //Try to close the file. Inability is a failure.
491     if (fclose(s))
492     {
493     fatal("Hash file close failure.", __FILE__, __LINE__);
494     }
495     }
496     //----------------------------------------------------------------------------------------------------
497     int sortcmpascendinghash(const void* p0_in, const void* p1_in)
498     {
499     const tFileHashRecord* p0, * p1;
500    
501     p0 = p0_in;
502     p1 = p1_in;
503    
504     return(strcmp(p0->hash, p1->hash));
505     }
506    
507     //----------------------------------------------------------------------------------------------------
508     void sortinternaldsbyhash(tFileHashRecord* parsed_recs, unsigned count)
509     {
510     qsort(parsed_recs, count, sizeof(tFileHashRecord), sortcmpascendinghash);
511     }
512     //----------------------------------------------------------------------------------------------------
513     int sortcmpascendingfname(const void* p0_in, const void* p1_in)
514     {
515     const tFileHashRecord* p0, * p1;
516    
517     p0 = p0_in;
518     p1 = p1_in;
519    
520     return(strcmp(p0->fname, p1->fname));
521     }
522     //----------------------------------------------------------------------------------------------------
523     //This sort has to be run after the hash sort. Within groups of identical hashes, it sorts by
524     //ascending filename.
525     void sortinternalgroupfname(tFileHashRecord* parsed_recs, unsigned count)
526     {
527     unsigned ui;
528     unsigned i_group_min, i_group_max;
529    
530     if (!count)
531     return;
532    
533     i_group_min = 0;
534     i_group_max = 0;
535    
536     do
537     {
538     //Advance i_group_max to the end of the group of duplicates.
539     while ((i_group_max < (count - 1)) && (!strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
540     {
541     i_group_max++;
542     }
543    
544     if (i_group_min != i_group_max)
545     {
546     //Sort the internal group.
547     qsort(parsed_recs + i_group_min,
548     i_group_max - i_group_min + 1,
549     sizeof(tFileHashRecord),
550     sortcmpascendingfname);
551     }
552    
553     //On to the next group.
554     i_group_max++;
555     i_group_min = i_group_max;
556    
557     } while (i_group_max < (count - 1));
558     }
559     //----------------------------------------------------------------------------------------------------
560     void printsinglerecord(tFileHashRecord* rec, unsigned elno)
561     {
562     printf("[%9u]\n", elno);
563     printf("Hash : %s\n", rec->hash);
564     printf("Filename : %s\n", rec->fname);
565     stdout_hline();
566     }
567     //----------------------------------------------------------------------------------------------------
568     void printinternalds(tFileHashRecord* parsed_recs, unsigned count)
569     {
570     unsigned i;
571    
572     for (i = 0; i < count; i++)
573     {
574     printsinglerecord(parsed_recs + i, i);
575     }
576     }
577     //----------------------------------------------------------------------------------------------------
578     void gather_dup_stats(tFileHashRecord* parsed_recs, unsigned count, unsigned* out_num_dups, unsigned* out_cumulative_dups)
579     {
580     unsigned i_group_min, i_group_max;
581    
582     *out_num_dups = 0;
583     *out_cumulative_dups = 0;
584    
585     if (!count)
586     return;
587    
588     i_group_min = 0;
589     i_group_max = 0;
590    
591     do
592     {
593     //Advance i_group_max to the end of the group of duplicates.
594     while ((i_group_max < (count - 1)) && (!strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
595     {
596     i_group_max++;
597     }
598    
599     //Log the findings.
600     if (i_group_min != i_group_max)
601     {
602     (*out_num_dups)++;
603     (*out_cumulative_dups) += (i_group_max - i_group_min + 1);
604     }
605    
606     //On to the next group.
607     i_group_max++;
608     i_group_min = i_group_max;
609    
610     } while (i_group_max < (count - 1));
611     }
612     //----------------------------------------------------------------------------------------------------
613     void option_dups(char* fname)
614     {
615     tFileHashRecord* parsed_recs;
616     unsigned count, num_dups, cumulative_dups;
617    
618     parseinputfile(&parsed_recs, &count, fname);
619     //printf("%u records parsed.\n", count);
620     sortinternaldsbyhash(parsed_recs, count);
621     sortinternalgroupfname(parsed_recs, count);
622     printinternalds(parsed_recs, count);
623     stdout_hline();
624     gather_dup_stats(parsed_recs, count, &num_dups, &cumulative_dups);
625     printf("Number of duplicated files : %u\n", num_dups);
626     if (num_dups)
627     {
628     printf("Average number of duplicates: %.2f\n", (double)cumulative_dups / (double)num_dups);
629     }
630     }
631     //----------------------------------------------------------------------------------------------------
632     void option_filterdups(char* fname)
633     {
634     tFileHashRecord* parsed_recs;
635     unsigned dupgroup;
636     unsigned count;
637     unsigned ui;
638     unsigned i_group_min, i_group_max;
639    
640     parseinputfile(&parsed_recs, &count, fname);
641     //printf("%u records parsed.\n", count);
642     sortinternaldsbyhash(parsed_recs, count);
643     sortinternalgroupfname(parsed_recs, count);
644    
645     if (!count)
646     return;
647    
648     dupgroup = 0;
649     i_group_min = 0;
650     i_group_max = 0;
651    
652     do
653     {
654     //Advance i_group_max to the end of the group of duplicates.
655     while ((i_group_max < (count - 1)) && (!strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
656     {
657     i_group_max++;
658     }
659    
660     //Print the findings.
661     if (i_group_min != i_group_max)
662     {
663     printf("Duplicate group %u:\n", dupgroup);
664     for (ui = i_group_min; ui <= i_group_max; ui++)
665     {
666     printf("%s\n", parsed_recs[ui].fname);
667     }
668    
669     dupgroup++;
670    
671     stdout_hline();
672     }
673    
674     //On to the next group.
675     i_group_max++;
676     i_group_min = i_group_max;
677    
678     } while (i_group_max < (count - 1));
679     }
680     //----------------------------------------------------------------------------------------------------
681     //Returns true if the filename is within the specified path, or false otherwise.
682     int is_path_member(const char* fname, const char* path)
683     {
684     if (strlen(fname) == 0)
685     {
686     fatal("Zero-length filename.", __FILE__, __LINE__);
687     }
688     else if (strlen(path) == 0)
689     {
690     fatal("Zero-length path.", __FILE__, __LINE__);
691     }
692     else if (path[strlen(path) - 1] != '/')
693     {
694     fatal("Paths must canonically end with forward slash character.", __FILE__, __LINE__);
695     }
696     else if (strlen(fname) <= strlen(path))
697     {
698     //Can't be in the path because filename is not longer than path name.
699     return 0;
700     }
701     else if (memcmp(fname, path, strlen(path)) == 0)
702     {
703     return 1;
704     }
705     else
706     {
707     return 0;
708     }
709     }
710     //----------------------------------------------------------------------------------------------------
711     void option_dedup(char* fname, char* path, int may_delete, double pause_time)
712     {
713     tFileHashRecord* parsed_recs;
714     unsigned dupgroup;
715     unsigned count;
716     unsigned ui;
717     unsigned within_path;
718     unsigned i_group_min, i_group_max;
719    
720     parseinputfile(&parsed_recs, &count, fname);
721     //printf("%u records parsed.\n", count);
722     sortinternaldsbyhash(parsed_recs, count);
723     sortinternalgroupfname(parsed_recs, count);
724    
725     if (!count)
726     return;
727    
728     dupgroup = 0;
729     i_group_min = 0;
730     i_group_max = 0;
731    
732     do
733     {
734     //Advance i_group_max to the end of the group of duplicates.
735     while ((i_group_max < (count - 1)) && (!strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
736     {
737     i_group_max++;
738     }
739    
740     //If this is a group of duplicates.
741     if (i_group_min != i_group_max)
742     {
743     //Print the findings.
744     printf("Duplicate group %u:\n", dupgroup);
745     for (ui = i_group_min; ui <= i_group_max; ui++)
746     {
747     printf("%s\n", parsed_recs[ui].fname);
748     }
749    
750     dupgroup++;
751    
752     stdout_hline();
753    
754     //Count how many of the group of duplicates are within the supplied path.
755     within_path = 0;
756     for (ui = i_group_min; ui <= i_group_max; ui++)
757     {
758     if (is_path_member(parsed_recs[ui].fname, path))
759     {
760     within_path++;
761     }
762     }
763    
764     //We have to take different actions based on whether we do or don't have any within path.
765     //If we don't have any, we may delete nothing.
766     if (!within_path)
767     {
768     printf("None of these duplicates in path--taking no action.\n");
769     //stdout_hline();
770     }
771     else
772     {
773     for (ui = i_group_min; ui <= i_group_max; ui++)
774     {
775     if (is_path_member(parsed_recs[ui].fname, path))
776     {
777     printf("Not deleting: %s\n", parsed_recs[ui].fname);
778     }
779     else
780     {
781     printf("Deleting : %s\n", parsed_recs[ui].fname);
782     if (may_delete)
783     {
784     if (!unlink(parsed_recs[ui].fname))
785     {
786     printf(" File deleted (unlinked) successfully.\n");
787     }
788     else
789     {
790     printf(" Failure attempting to delete (unlink) file.\n");
791     }
792     }
793     else
794     {
795     printf(" Dry run only.\n");
796     }
797     }
798    
799     //w_sleep(pause_time);
800     }
801     }
802    
803     stdout_hline();
804     }
805    
806     //On to the next group.
807     i_group_max++;
808     i_group_min = i_group_max;
809    
810     } while (i_group_max < (count - 1));
811     }
812     //----------------------------------------------------------------------------------------------------
813     int main(int argc, char* argv[])
814     {
815     stdout_hline();
816     printf("Execution begins.\n");
817     stdout_hline();
818    
819     if (argc == 1)
820     {
821     }
822     else if ((argc == 3) && (strcmp(argv[1], "ndups") == 0))
823     {
824     option_dups(argv[2]);
825     }
826     else if ((argc == 3) && (strcmp(argv[1], "filterdups") == 0))
827     {
828     option_filterdups(argv[2]);
829     }
830     else if ((argc == 3) && (strcmp(argv[1], "dedup_nopath") == 0))
831     {
832     //option_filterdups(argv[2]);
833     }
834     else if ((argc == 3) && (strcmp(argv[1], "dryrun_nopath") == 0))
835     {
836     //option_filterdups(argv[2]);
837     }
838     else if ((argc == 4) && (strcmp(argv[1], "dedup_preserve_inside") == 0))
839     {
840     option_dedup(argv[2], argv[3], 1, UNLINKPAUSETIME);
841     }
842     else if ((argc == 4) && (strcmp(argv[1], "dryrun_preserve_inside") == 0))
843     {
844     option_dedup(argv[2], argv[3], 0, UNLINKPAUSETIME / 10.0);
845     }
846     else
847     {
848     printf("Unrecognized parameter form. Try \"dedup\".\n");
849     }
850    
851     //w_sleep(-3 /* UNLINKPAUSETIME*/ );
852    
853     //stdout_hline();
854     printf("Execution ends.\n");
855     stdout_hline();
856    
857     return 0;
858     }
859     //----------------------------------------------------------------------------------------------------
860     #endif

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision URL Header

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25