/[dtapublic]/projs/dtats/trunk/projs/2016/20161007_ddeedduupp/win/ddeedduupp/ddeedduupp.cpp
ViewVC logotype

Diff of /projs/dtats/trunk/projs/2016/20161007_ddeedduupp/win/ddeedduupp/ddeedduupp.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 309 by dashley, Wed Jan 1 23:06:00 2020 UTC revision 310 by dashley, Wed Jan 1 23:15:08 2020 UTC
# Line 18  int main() Line 18  int main()
18  //   4. Use the Error List window to view errors  //   4. Use the Error List window to view errors
19  //   5. Go to Project > Add New Item to create new code files, or Project > Add Existing Item to add existing code files to the project  //   5. Go to Project > Add New Item to create new code files, or Project > Add Existing Item to add existing code files to the project
20  //   6. In the future, to open this project again, go to File > Open > Project and select the .sln file  //   6. In the future, to open this project again, go to File > Open > Project and select the .sln file
21    
22    #if 0
23    //----------------------------------------------------------------------------------------------------
24    //$Header$
25    //----------------------------------------------------------------------------------------------------
26    //qdedup.c
27    //----------------------------------------------------------------------------------------------------
28    //Quick and dirty program to eliminate duplicates from a file tree.  A file containing the SHA512
29    //hashes of all the files to be considered must already exist, and must be regenerated each time the
30    //underlying files are deleted/added/modified, which means the file must regenerated after each run
31    //of qdedup.  (WARNING:  IF YOU DO NOT REGENERATE THE FILE AFTER EACH RUN OF qdedup, YOU WILL
32    //PROBABLY DESTROY DATA.  THE MECHANISM WOULD BE THAT THE SHA512 MANIFEST IMPLIES THAT DUPLICATES
33    //EXIST WHEN THEY NO LONGER DO, SO qdedup WILL ERRONEOUSLY DELETE THE LAST COPIES OF FILES.)  The
34    //program will eliminate duplicates within a single specified directory or outside a single specified
35    //directory.
36    //
37    //This program will compile and run only on *nix systems and under Cygwin on Windows systems.
38    //----------------------------------------------------------------------------------------------------
39    //Copyright David T. Ashley (dashley@gmail.com), 2016.
40    //----------------------------------------------------------------------------------------------------
41    //Provided under the MIT LICENSE, reproduced immediately below.
42    //----------------------------------------------------------------------------------------------------
43    //Permission is hereby granted, free of charge, to any person obtaining a copy of
44    //this software and associated documentation files (the "Software"), to deal in the
45    //Software without restriction, including without limitation the rights to use,
46    //copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
47    //Software, and to permit persons to whom the Software is furnished to do so,
48    //subject to the following conditions:
49    //
50    //The above copyright notice and this permission notice shall be included in all
51    //copies or substantial portions of the Software.
52    //
53    //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
54    //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
55    //FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
56    //AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
57    //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
58    //OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
59    //SOFTWARE.
60    //----------------------------------------------------------------------------------------------------
61    //All paths in the SHA512 file must be absolute or must be relative to the current working directory
62    //at the time this program is run.
63    //
64    //The recommended method to generate the SHA512 file is using the "-exec" option of the "find"
65    //command, i.e.
66    //
67    //   find target_directory -type f -exec sha512sum {} \; >sha512sums.txt
68    //
69    //If any files are deleted by the program, a new SHA512 file must be generated before the program is
70    //run again to delete files.  The reason for this restriction is that the program will never knowingly
71    //delete the last copy of a file.  If the SHA512 file contains the digests of files that no longer
72    //exist, the program may unknowingly delete the last copies of files (because it believes based on
73    //the SHA512 file that other copies exist when in fact they do not).
74    //
75    //The SHA512 file does not need to be sorted (this program sorts it internally by hash before using it).
76    //
77    //This program is designed to compile and run under Cygwin or *nix only.
78    //
79    //Usage:
80    //   qdedup
81    //      Prints help information and exits.
82    //   qdedup ndups <sha512file>
83    //      Prints statistics about the number of duplicates in <sha512file>.
84    //   qdedup filterdups <sha512file>
85    //      Analyzes duplicates and prints the filenames of groups of duplicates.  The output is designed
86    //      for hand analysis so that insight can be gained into what duplicates exist and where they
87    //      are located.
88    //   qdedup dedup_preserve_inside <sha512file> <path>
89    //      For each group of duplicates that exists, preserves the duplicates that exist within path
90    //      and removes all others.  If no copies of the duplicate exist within path, no copies of the
91    //      duplicate will be removed.
92    //   qdedup dryrun_preserve_inside <sha512file> <path>
93    //      Exactly like "dedup_preserve_inside", except that no files will be deleted.  Text will be
94    //      output to explain what would be deleted by "dedup_preserve_inside".
95    //   qdedup dedup_nopath <sha512file>
96    //      For each group of duplicates that exists, preserves only the first (the only with lowest
97    //      sort-order filename).
98    //   qdedup dryrun_nopath <sha512file> <path>
99    //      Exactly like "dedup_nopath", except that no files will be deleted.  Text will be
100    //      output to explain what would be deleted by "dedup_nopath".
101    //   qdedup dedup_preserve_outside <sha512file> <path>
102    //      For each group of duplicates that exists, deletes duplicates only from within the specified
103    //      path.  If any duplicates do not have at least one copy within <path> no instances of the
104    //      duplicate are deleted.
105    //   qdedup dryrun_preserve_outside <sha512file>
106    //      Exactly like "dedup_preserve_outside", except that no files will be deleted.  Text will be
107    //      output to explain what would be deleted by "dedup_preserve_outside".
108    //----------------------------------------------------------------------------------------------------
109    #include <math.h>
110    #include <stdio.h>
111    #include <stdlib.h>
112    #include <string.h>
113    #include <time.h>
114    #include <unistd.h>
115    //----------------------------------------------------------------------------------------------------
116    #define LINELEN           (78)  //Number of printable characters in a line.
117    #define MAXLINELEN      (2000)  //The maximum number of characters that may be in a line of the
118                                                                    //SHA512 input file.  This count includes the \0 terminator, so only
119                                                                    //this value minus 1 characters may be in a line.
120    #define UNLINKPAUSETIME  (0.1)  //Number of seconds to pause between file unlinks (deletions).  This
121                                                                    //is designed to give the user time to abort the program if desired
122                                                                    //before catastrophic quantities of files are deleted.
123    //----------------------------------------------------------------------------------------------------
124    //Data structure that holds the character representation of and SHA512 hash, plus the specified
125    //filename.
126    typedef struct
127    {
128            char hash[129];
129            //512/4 = 128 characters for the hash, plus 1 character for zero terminator.
130            char* fname;
131            //Filename as specified in the file, allocated via malloc() family.
132    } tFileHashRecord;
133    //----------------------------------------------------------------------------------------------------
134    //----------------------------------------------------------------------------------------------------
135    //-----  CHARACTER CLASSIFICATION FUNCTIONS  ---------------------------------------------------------
136    //----------------------------------------------------------------------------------------------------
137    //----------------------------------------------------------------------------------------------------
138    //TRUE if character is part of valid hash.
139    int is_valid_hash_char(char c)
140    {
141            switch (c)
142            {
143            case '0':
144            case '1':
145            case '2':
146            case '3':
147            case '4':
148            case '5':
149            case '6':
150            case '7':
151            case '8':
152            case '9':
153            case 'a':
154            case 'b':
155            case 'c':
156            case 'd':
157            case 'e':
158            case 'f':
159                    return(1);
160                    break;
161            default:
162                    return(0);
163                    break;
164            }
165    }
166    //----------------------------------------------------------------------------------------------------
167    //TRUE if character is part of newline sequence
168    int is_newline_sequence_char(char c)
169    {
170            switch (c)
171            {
172            case 13:
173            case 10:
174                    return(1);
175                    break;
176            default:
177                    return(0);
178                    break;
179            }
180    }
181    
182    //----------------------------------------------------------------------------------------------------
183    //----------------------------------------------------------------------------------------------------
184    //-----  FORMATTED OUTPUT FUNCTIONS  -----------------------------------------------------------------
185    //----------------------------------------------------------------------------------------------------
186    //----------------------------------------------------------------------------------------------------
187    //Repeats a character to a stream a specified number of times.
188    //
189    void stream_rep_char(FILE* s, char c, unsigned n)
190    {
191            while (n--)
192            {
193                    fprintf(s, "%c", c);
194            }
195    }
196    //----------------------------------------------------------------------------------------------------
197    //Prints a horizontal line to a stream, including the newline.
198    //
199    void stream_hline(FILE* s)
200    {
201            stream_rep_char(s, '-', LINELEN);
202            fprintf(s, "\n");
203    }
204    //----------------------------------------------------------------------------------------------------
205    //Prints a horizontal line to a stdout, including the newline.
206    //
207    void stdout_hline(void)
208    {
209            stream_rep_char(stdout, '-', LINELEN);
210            fprintf(stdout, "\n");
211    }
212    //----------------------------------------------------------------------------------------------------
213    //----------------------------------------------------------------------------------------------------
214    //-----  FATAL ERROR FUNCTIONS  ----------------------------------------------------------------------
215    //----------------------------------------------------------------------------------------------------
216    //----------------------------------------------------------------------------------------------------
217    //Errors out fatally.
218    //
219    void fatal(const char* desc, const char* file, unsigned line)
220    {
221            stdout_hline();
222            printf("Fatal error:  %s\n", desc);
223            printf("Source file:  %s\n", file);
224            printf("Line       :  %u\n", line);
225            stdout_hline();
226            exit(1);
227    }
228    //----------------------------------------------------------------------------------------------------
229    //----------------------------------------------------------------------------------------------------
230    //-----  MEMORY ALLOCATION WRAPPERS  -----------------------------------------------------------------
231    //----------------------------------------------------------------------------------------------------
232    //----------------------------------------------------------------------------------------------------
233    //malloc() wrapper.
234    void* w_malloc(size_t nbytes)
235    {
236            void* rv;
237    
238            if (!nbytes)
239            {
240                    fatal("Memory allocation request for 0 bytes.", __FILE__, __LINE__);
241            }
242    
243            rv = malloc(nbytes);
244    
245            if (!rv)
246            {
247                    fatal("Out of memory in malloc() request.", __FILE__, __LINE__);
248            }
249    
250            //Zero out, just for consistency.
251            memset(rv, 0, nbytes);
252    }
253    //----------------------------------------------------------------------------------------------------
254    //realloc() wrapper.
255    void* w_realloc(void* p, size_t n)
256    {
257            void* rv;
258    
259            if (!n)
260            {
261                    fatal("Memory reallocation request for 0 bytes.", __FILE__, __LINE__);
262            }
263    
264            if (!p)
265            {
266                    fatal("Memory reallocation request with NULL pointer.", __FILE__, __LINE__);
267            }
268    
269            rv = realloc(p, n);
270    
271            if (!rv)
272            {
273                    fatal("Out of memory in realloc() request.", __FILE__, __LINE__);
274            }
275    }
276    //----------------------------------------------------------------------------------------------------
277    //----------------------------------------------------------------------------------------------------
278    //-----  SLEEP FUNCTIONS  ----------------------------------------------------------------------------
279    //----------------------------------------------------------------------------------------------------
280    //----------------------------------------------------------------------------------------------------
281    //Sleep for a time, in seconds.
282    void w_sleep(double seconds)
283    {
284            struct timespec t;
285    
286            if (seconds < 0)
287            {
288                    fatal("Sleep for negative time request.", __FILE__, __LINE__);
289            }
290            else if (seconds > 3600)
291            {
292                    fatal("Sleep for too long request.", __FILE__, __LINE__);
293            }
294    
295            t.tv_sec = floor(seconds);
296            t.tv_nsec = (seconds - floor(seconds)) * 1E9;
297    
298            nanosleep(&t, NULL);
299    }
300    //----------------------------------------------------------------------------------------------------
301    //----------------------------------------------------------------------------------------------------
302    //-----  SHA512 FIELD READ FUNCTIONS  ----------------------------------------------------------------
303    //----------------------------------------------------------------------------------------------------
304    //----------------------------------------------------------------------------------------------------
305    //These functions read in an individual field of a standard SHA512 file generated using application
306    //of the standard sha512sum program.
307    //
308    //*rcode = 1, success.
309    //         0, legal end of file, record assigned.
310    void get_sha512file_line(FILE* s, int* rcode, tFileHashRecord* hash_rec)
311    {
312            unsigned bidx;
313            unsigned nchars;
314            int ic;
315            int exitflag;
316            int eoffound;
317            int eolfound;
318            char c;
319            char buf[MAXLINELEN];
320    
321            //Zero out the buffer.  This handles string termination automatically.
322            memset(buf, 0, sizeof(buf));
323    
324            //Read characters into the buffer until either hit EOF, newline, or can't
325            //fill the buffer any longer.
326            eoffound = 0;
327            eolfound = 0;
328            exitflag = 0;
329            bidx = 0;
330            do
331            {
332                    ic = fgetc(s);
333                    c = ic;
334    
335                    if (ic == EOF)
336                    {
337                            eoffound = 1;
338                            eolfound = 0;
339                            nchars = bidx;
340                            exitflag = 1;
341                    }
342                    else if (is_newline_sequence_char(c))
343                    {
344                            eoffound = 0;
345                            eolfound = 1;
346                            nchars = bidx;
347                            exitflag = 1;
348                    }
349                    else if (bidx >= (MAXLINELEN - 1))
350                    {
351                            fatal("SHA512 hash file line too long to parse.", __FILE__, __LINE__);
352                    }
353                    else
354                    {
355                            buf[bidx] = c;
356                            bidx++;
357                            exitflag = 0;
358                    }
359            } while (!exitflag);
360    
361            //If we encountered a newline, inch past it.  We may encounter an EOF.
362            if (eolfound)
363            {
364                    exitflag = 0;
365                    do
366                    {
367                            ic = fgetc(s);
368                            c = ic;
369    
370                            if (ic == EOF)
371                            {
372                                    eoffound = 1;
373                                    eolfound = 0;
374                                    exitflag = 1;
375                            }
376                            else if (is_newline_sequence_char(c))
377                            {
378                                    exitflag = 0;
379                            }
380                            else
381                            {
382                                    //We hit the next line.  Put the character back.
383                                    eoffound = 0;
384                                    eolfound = 1;
385                                    ungetc(ic, s);
386                                    exitflag = 1;
387                            }
388                    } while (!exitflag);
389            }
390    
391            //For better or worse, we have a \0-terminated line in the buffer.
392            //
393            //Zero the caller's area.  This takes care of the hash terminator as well.
394            memset(hash_rec, 0, sizeof(*hash_rec));
395    
396            //Ensure that we have at least 128 characters, and they are all hex characters.
397            //Otherwise, we can't proceed.
398            if (nchars < 128)
399            {
400                    fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
401            }
402            else
403            {
404                    for (bidx = 0; bidx < 128; bidx++)
405                    {
406                            if (!is_valid_hash_char(buf[bidx]))
407                            {
408                                    fatal("Character in SHA512 hash portion of line inconsistent with hash.", __FILE__, __LINE__);
409                            }
410                    }
411            }
412    
413            //The 129th and 130'th character must be present and must be a space and asterisk, respectively.
414            if (nchars < 130)
415            {
416                    fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
417            }
418            else if (buf[128] != ' ')
419            {
420                    fatal("129th hash line character must be \" \".", __FILE__, __LINE__);
421            }
422            else if (buf[129] != '*')
423            {
424                    fatal("130th hash line character must be \"*\".", __FILE__, __LINE__);
425            }
426            //   else if (buf[129] != ' ')
427            //   {
428            //      //130th character is ' '.  Need to figure out why sometimes space and sometimes '*'.
429            //      fatal("130th hash line character must be \" \".", __FILE__, __LINE__);
430            //   }
431    
432               //There must be a 131'st character.  Beyond that, we can't qualify, because filenames may
433               //have odd characters and may be of any length.
434            if (nchars < 131)
435            {
436                    fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
437            }
438    
439            //Copy the hash to the caller's area.  The terminator has already been inserted.
440            memcpy(&(hash_rec->hash[0]), buf, 128);
441    
442            //Allocate space for the filename.
443            hash_rec->fname = w_malloc(strlen(buf + 130) + 1);
444    
445            //Make the copy.
446            strcpy(hash_rec->fname, buf + 130);
447    
448            if (eoffound)
449                    * rcode = 0;
450            else
451                    *rcode = 1;
452    }
453    //----------------------------------------------------------------------------------------------------
454    void parseinputfile(tFileHashRecord** parsed_recs, unsigned* count, char* fname)
455    {
456            FILE* s;
457            int rcode;
458    
459            //Try to open the file for reading.  Inability is a failure.
460            s = fopen(fname, "r");
461            if (!s)
462            {
463                    fatal("Hash file open failure.", __FILE__, __LINE__);
464            }
465    
466            //Start off with a count of 0 and a NULL pointer.
467            *count = 0;
468            *parsed_recs = NULL;
469    
470            do
471            {
472                    //For the first time, allocate space for one record.  Beyond that,
473                    //expand it.
474                    if (!*parsed_recs)
475                    {
476                            *parsed_recs = w_malloc(sizeof(tFileHashRecord));
477                    }
478                    else
479                    {
480                            *parsed_recs = w_realloc(*parsed_recs, (size_t)((*count + 1)) * sizeof(tFileHashRecord));
481                    }
482    
483                    //Parse and fill in the space.
484                    get_sha512file_line(s, &rcode, (*parsed_recs) + (*count));
485    
486                    //We now have one more.
487                    (*count)++;
488            } while (rcode == 1);
489    
490            //Try to close the file.  Inability is a failure.
491            if (fclose(s))
492            {
493                    fatal("Hash file close failure.", __FILE__, __LINE__);
494            }
495    }
496    //----------------------------------------------------------------------------------------------------
497    int sortcmpascendinghash(const void* p0_in, const void* p1_in)
498    {
499            const tFileHashRecord* p0, * p1;
500    
501            p0 = p0_in;
502            p1 = p1_in;
503    
504            return(strcmp(p0->hash, p1->hash));
505    }
506    
507    //----------------------------------------------------------------------------------------------------
508    void sortinternaldsbyhash(tFileHashRecord* parsed_recs, unsigned count)
509    {
510            qsort(parsed_recs, count, sizeof(tFileHashRecord), sortcmpascendinghash);
511    }
512    //----------------------------------------------------------------------------------------------------
513    int sortcmpascendingfname(const void* p0_in, const void* p1_in)
514    {
515            const tFileHashRecord* p0, * p1;
516    
517            p0 = p0_in;
518            p1 = p1_in;
519    
520            return(strcmp(p0->fname, p1->fname));
521    }
522    //----------------------------------------------------------------------------------------------------
523    //This sort has to be run after the hash sort.  Within groups of identical hashes, it sorts by
524    //ascending filename.
525    void sortinternalgroupfname(tFileHashRecord* parsed_recs, unsigned count)
526    {
527            unsigned ui;
528            unsigned i_group_min, i_group_max;
529    
530            if (!count)
531                    return;
532    
533            i_group_min = 0;
534            i_group_max = 0;
535    
536            do
537            {
538                    //Advance i_group_max to the end of the group of duplicates.
539                    while ((i_group_max < (count - 1)) && (!strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
540                    {
541                            i_group_max++;
542                    }
543    
544                    if (i_group_min != i_group_max)
545                    {
546                            //Sort the internal group.
547                            qsort(parsed_recs + i_group_min,
548                                    i_group_max - i_group_min + 1,
549                                    sizeof(tFileHashRecord),
550                                    sortcmpascendingfname);
551                    }
552    
553                    //On to the next group.
554                    i_group_max++;
555                    i_group_min = i_group_max;
556    
557            } while (i_group_max < (count - 1));
558    }
559    //----------------------------------------------------------------------------------------------------
560    void printsinglerecord(tFileHashRecord* rec, unsigned elno)
561    {
562            printf("[%9u]\n", elno);
563            printf("Hash       : %s\n", rec->hash);
564            printf("Filename   : %s\n", rec->fname);
565            stdout_hline();
566    }
567    //----------------------------------------------------------------------------------------------------
568    void printinternalds(tFileHashRecord* parsed_recs, unsigned count)
569    {
570            unsigned i;
571    
572            for (i = 0; i < count; i++)
573            {
574                    printsinglerecord(parsed_recs + i, i);
575            }
576    }
577    //----------------------------------------------------------------------------------------------------
578    void gather_dup_stats(tFileHashRecord* parsed_recs, unsigned count, unsigned* out_num_dups, unsigned* out_cumulative_dups)
579    {
580            unsigned i_group_min, i_group_max;
581    
582            *out_num_dups = 0;
583            *out_cumulative_dups = 0;
584    
585            if (!count)
586                    return;
587    
588            i_group_min = 0;
589            i_group_max = 0;
590    
591            do
592            {
593                    //Advance i_group_max to the end of the group of duplicates.
594                    while ((i_group_max < (count - 1)) && (!strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
595                    {
596                            i_group_max++;
597                    }
598    
599                    //Log the findings.
600                    if (i_group_min != i_group_max)
601                    {
602                            (*out_num_dups)++;
603                            (*out_cumulative_dups) += (i_group_max - i_group_min + 1);
604                    }
605    
606                    //On to the next group.
607                    i_group_max++;
608                    i_group_min = i_group_max;
609    
610            } while (i_group_max < (count - 1));
611    }
612    //----------------------------------------------------------------------------------------------------
613    void option_dups(char* fname)
614    {
615            tFileHashRecord* parsed_recs;
616            unsigned count, num_dups, cumulative_dups;
617    
618            parseinputfile(&parsed_recs, &count, fname);
619            //printf("%u records parsed.\n", count);
620            sortinternaldsbyhash(parsed_recs, count);
621            sortinternalgroupfname(parsed_recs, count);
622            printinternalds(parsed_recs, count);
623            stdout_hline();
624            gather_dup_stats(parsed_recs, count, &num_dups, &cumulative_dups);
625            printf("Number of duplicated files  : %u\n", num_dups);
626            if (num_dups)
627            {
628                    printf("Average number of duplicates: %.2f\n", (double)cumulative_dups / (double)num_dups);
629            }
630    }
631    //----------------------------------------------------------------------------------------------------
632    void option_filterdups(char* fname)
633    {
634            tFileHashRecord* parsed_recs;
635            unsigned dupgroup;
636            unsigned count;
637            unsigned ui;
638            unsigned i_group_min, i_group_max;
639    
640            parseinputfile(&parsed_recs, &count, fname);
641            //printf("%u records parsed.\n", count);
642            sortinternaldsbyhash(parsed_recs, count);
643            sortinternalgroupfname(parsed_recs, count);
644    
645            if (!count)
646                    return;
647    
648            dupgroup = 0;
649            i_group_min = 0;
650            i_group_max = 0;
651    
652            do
653            {
654                    //Advance i_group_max to the end of the group of duplicates.
655                    while ((i_group_max < (count - 1)) && (!strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
656                    {
657                            i_group_max++;
658                    }
659    
660                    //Print the findings.
661                    if (i_group_min != i_group_max)
662                    {
663                            printf("Duplicate group %u:\n", dupgroup);
664                            for (ui = i_group_min; ui <= i_group_max; ui++)
665                            {
666                                    printf("%s\n", parsed_recs[ui].fname);
667                            }
668    
669                            dupgroup++;
670    
671                            stdout_hline();
672                    }
673    
674                    //On to the next group.
675                    i_group_max++;
676                    i_group_min = i_group_max;
677    
678            } while (i_group_max < (count - 1));
679    }
680    //----------------------------------------------------------------------------------------------------
681    //Returns true if the filename is within the specified path, or false otherwise.
682    int is_path_member(const char* fname, const char* path)
683    {
684            if (strlen(fname) == 0)
685            {
686                    fatal("Zero-length filename.", __FILE__, __LINE__);
687            }
688            else if (strlen(path) == 0)
689            {
690                    fatal("Zero-length path.", __FILE__, __LINE__);
691            }
692            else if (path[strlen(path) - 1] != '/')
693            {
694                    fatal("Paths must canonically end with forward slash character.", __FILE__, __LINE__);
695            }
696            else if (strlen(fname) <= strlen(path))
697            {
698                    //Can't be in the path because filename is not longer than path name.
699                    return 0;
700            }
701            else if (memcmp(fname, path, strlen(path)) == 0)
702            {
703                    return 1;
704            }
705            else
706            {
707                    return 0;
708            }
709    }
710    //----------------------------------------------------------------------------------------------------
711    void option_dedup(char* fname, char* path, int may_delete, double pause_time)
712    {
713            tFileHashRecord* parsed_recs;
714            unsigned dupgroup;
715            unsigned count;
716            unsigned ui;
717            unsigned within_path;
718            unsigned i_group_min, i_group_max;
719    
720            parseinputfile(&parsed_recs, &count, fname);
721            //printf("%u records parsed.\n", count);
722            sortinternaldsbyhash(parsed_recs, count);
723            sortinternalgroupfname(parsed_recs, count);
724    
725            if (!count)
726                    return;
727    
728            dupgroup = 0;
729            i_group_min = 0;
730            i_group_max = 0;
731    
732            do
733            {
734                    //Advance i_group_max to the end of the group of duplicates.
735                    while ((i_group_max < (count - 1)) && (!strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
736                    {
737                            i_group_max++;
738                    }
739    
740                    //If this is a group of duplicates.
741                    if (i_group_min != i_group_max)
742                    {
743                            //Print the findings.
744                            printf("Duplicate group %u:\n", dupgroup);
745                            for (ui = i_group_min; ui <= i_group_max; ui++)
746                            {
747                                    printf("%s\n", parsed_recs[ui].fname);
748                            }
749    
750                            dupgroup++;
751    
752                            stdout_hline();
753    
754                            //Count how many of the group of duplicates are within the supplied path.
755                            within_path = 0;
756                            for (ui = i_group_min; ui <= i_group_max; ui++)
757                            {
758                                    if (is_path_member(parsed_recs[ui].fname, path))
759                                    {
760                                            within_path++;
761                                    }
762                            }
763    
764                            //We have to take different actions based on whether we do or don't have any within path.
765                            //If we don't have any, we may delete nothing.
766                            if (!within_path)
767                            {
768                                    printf("None of these duplicates in path--taking no action.\n");
769                                    //stdout_hline();
770                            }
771                            else
772                            {
773                                    for (ui = i_group_min; ui <= i_group_max; ui++)
774                                    {
775                                            if (is_path_member(parsed_recs[ui].fname, path))
776                                            {
777                                                    printf("Not deleting: %s\n", parsed_recs[ui].fname);
778                                            }
779                                            else
780                                            {
781                                                    printf("Deleting    : %s\n", parsed_recs[ui].fname);
782                                                    if (may_delete)
783                                                    {
784                                                            if (!unlink(parsed_recs[ui].fname))
785                                                            {
786                                                                    printf("   File deleted (unlinked) successfully.\n");
787                                                            }
788                                                            else
789                                                            {
790                                                                    printf("   Failure attempting to delete (unlink) file.\n");
791                                                            }
792                                                    }
793                                                    else
794                                                    {
795                                                            printf("   Dry run only.\n");
796                                                    }
797                                            }
798    
799                                            //w_sleep(pause_time);
800                                    }
801                            }
802    
803                            stdout_hline();
804                    }
805    
806                    //On to the next group.
807                    i_group_max++;
808                    i_group_min = i_group_max;
809    
810            } while (i_group_max < (count - 1));
811    }
812    //----------------------------------------------------------------------------------------------------
813    int main(int argc, char* argv[])
814    {
815            stdout_hline();
816            printf("Execution begins.\n");
817            stdout_hline();
818    
819            if (argc == 1)
820            {
821            }
822            else if ((argc == 3) && (strcmp(argv[1], "ndups") == 0))
823            {
824                    option_dups(argv[2]);
825            }
826            else if ((argc == 3) && (strcmp(argv[1], "filterdups") == 0))
827            {
828                    option_filterdups(argv[2]);
829            }
830            else if ((argc == 3) && (strcmp(argv[1], "dedup_nopath") == 0))
831            {
832                    //option_filterdups(argv[2]);
833            }
834            else if ((argc == 3) && (strcmp(argv[1], "dryrun_nopath") == 0))
835            {
836                    //option_filterdups(argv[2]);
837            }
838            else if ((argc == 4) && (strcmp(argv[1], "dedup_preserve_inside") == 0))
839            {
840                    option_dedup(argv[2], argv[3], 1, UNLINKPAUSETIME);
841            }
842            else if ((argc == 4) && (strcmp(argv[1], "dryrun_preserve_inside") == 0))
843            {
844                    option_dedup(argv[2], argv[3], 0, UNLINKPAUSETIME / 10.0);
845            }
846            else
847            {
848                    printf("Unrecognized parameter form.  Try \"dedup\".\n");
849            }
850    
851            //w_sleep(-3 /* UNLINKPAUSETIME*/ );
852    
853            //stdout_hline();
854            printf("Execution ends.\n");
855            stdout_hline();
856    
857            return 0;
858    }
859    //----------------------------------------------------------------------------------------------------
860    #endif

Legend:
Removed from v.309  
changed lines
  Added in v.310

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25