/[dtapublic]/projs/trunk/projs/20161007_dedup/qdedup.c
ViewVC logotype

Diff of /projs/trunk/projs/20161007_dedup/qdedup.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

swprojs/trunk/projs/20161007_dedup/dedup.c revision 12 by dashley, Sat Oct 8 00:50:01 2016 UTC projs/trunk/projs/20161007_dedup/qdedup.c revision 74 by dashley, Sat Nov 5 16:51:05 2016 UTC
# Line 1  Line 1 
1  //----------------------------------------------------------------------------------------------------  //----------------------------------------------------------------------------------------------------
2  //qdedup.c  //qdedup.c
3  //----------------------------------------------------------------------------------------------------  //----------------------------------------------------------------------------------------------------
4  //Quick and dirty program to eliminate duplicates from a file tree.  A file containing the SHA512  //Quick and dirty program to eliminate duplicates from a file tree.  A file containing the SHA512
5  //hashes of all the files to be considered must already exist.  The program will eliminate duplicates  //hashes of all the files to be considered must already exist, and must be regenerated each time the
6  //outside a single specified directory.  //underlying files are deleted/added/modified, which means the file must regenerated after each run
7  //----------------------------------------------------------------------------------------------------  //of qdedup.  (WARNING:  IF YOU DO NOT REGENERATE THE FILE AFTER EACH RUN OF qdedup, YOU WILL
8  //Provided under the GNU GENERAL PUBLIC LICENSE, VERSION 3, reproduced immediately below.  //PROBABLY DESTROY DATA.  THE MECHANISM WOULD BE THAT THE SHA512 MANIFEST IMPLIES THAT DUPLICATES
9  //----------------------------------------------------------------------------------------------------  //EXIST WHEN THEY NO LONGER DO, SO qdedup WILL ERRONEOUSLY DELETE THE LAST COPIES OF FILES.)  The
10  //                    GNU GENERAL PUBLIC LICENSE  //program will eliminate duplicates within a single specified directory or outside a single specified
11  //                       Version 3, 29 June 2007  //directory.
12  //  //
13  // Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>  //This program will compile and run only on *nix systems and under Cygwin on Windows systems.
14  // Everyone is permitted to copy and distribute verbatim copies  //----------------------------------------------------------------------------------------------------
15  // of this license document, but changing it is not allowed.  //Copyright David T. Ashley (dashley@gmail.com), 2016.
16  //  //----------------------------------------------------------------------------------------------------
17  //                            Preamble  //Provided under the MIT LICENSE, reproduced immediately below.
18  //  //----------------------------------------------------------------------------------------------------
19  //  The GNU General Public License is a free, copyleft license for  //Permission is hereby granted, free of charge, to any person obtaining a copy of
20  //software and other kinds of works.  //this software and associated documentation files (the "Software"), to deal in the
21  //  //Software without restriction, including without limitation the rights to use,
22  //  The licenses for most software and other practical works are designed  //copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
23  //to take away your freedom to share and change the works.  By contrast,  //Software, and to permit persons to whom the Software is furnished to do so,
24  //the GNU General Public License is intended to guarantee your freedom to  //subject to the following conditions:
25  //share and change all versions of a program--to make sure it remains free  //
26  //software for all its users.  We, the Free Software Foundation, use the  //The above copyright notice and this permission notice shall be included in all
27  //GNU General Public License for most of our software; it applies also to  //copies or substantial portions of the Software.
28  //any other work released this way by its authors.  You can apply it to  //
29  //your programs, too.  //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30  //  //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31  //  When we speak of free software, we are referring to freedom, not  //FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
32  //price.  Our General Public Licenses are designed to make sure that you  //AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33  //have the freedom to distribute copies of free software (and charge for  //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34  //them if you wish), that you receive source code or can get it if you  //OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35  //want it, that you can change the software or use pieces of it in new  //SOFTWARE.
36  //free programs, and that you know you can do these things.  //----------------------------------------------------------------------------------------------------
37  //  //All paths in the SHA512 file must be absolute or must be relative to the current working directory
38  //  To protect your rights, we need to prevent others from denying you  //at the time this program is run.
39  //these rights or asking you to surrender the rights.  Therefore, you have  //
40  //certain responsibilities if you distribute copies of the software, or if  //The recommended method to generate the SHA512 file is using the "-exec" option of the "find"
41  //you modify it: responsibilities to respect the freedom of others.  //command, i.e.
42  //  //
43  //  For example, if you distribute copies of such a program, whether  //   find target_directory -type f -exec sha512sum {} \; >sha512sums.txt
44  //gratis or for a fee, you must pass on to the recipients the same  //
45  //freedoms that you received.  You must make sure that they, too, receive  //If any files are deleted by the program, a new SHA512 file must be generated before the program is
46  //or can get the source code.  And you must show them these terms so they  //run again to delete files.  The reason for this restriction is that the program will never knowingly
47  //know their rights.  //delete the last copy of a file.  If the SHA512 file contains the digests of files that no longer
48  //  //exist, the program may unknowingly delete the last copies of files (because it believes based on
49  //  Developers that use the GNU GPL protect your rights with two steps:  //the SHA512 file that other copies exist when in fact they do not).
50  //(1) assert copyright on the software, and (2) offer you this License  //
51  //giving you legal permission to copy, distribute and/or modify it.  //The SHA512 file does not need to be sorted (this program sorts it internally by hash before using it).
52  //  //
53  //  For the developers' and authors' protection, the GPL clearly explains  //This program is designed to compile and run under Cygwin or *nix only.
54  //that there is no warranty for this free software.  For both users' and  //
55  //authors' sake, the GPL requires that modified versions be marked as  //Usage:
56  //changed, so that their problems will not be attributed erroneously to  //   qdedup
57  //authors of previous versions.  //      Prints help information and exits.
58  //  //   qdedup ndups <sha512file>
59  //  Some devices are designed to deny users access to install or run  //      Prints statistics about the number of duplicates in <sha512file>.
60  //modified versions of the software inside them, although the manufacturer  //   qdedup filterdups <sha512file>
61  //can do so.  This is fundamentally incompatible with the aim of  //      Analyzes duplicates and prints the filenames of groups of duplicates.  The output is designed
62  //protecting users' freedom to change the software.  The systematic  //      for hand analysis so that insight can be gained into what duplicates exist and where they
63  //pattern of such abuse occurs in the area of products for individuals to  //      are located.
64  //use, which is precisely where it is most unacceptable.  Therefore, we  //   qdedup dedup_preserve_inside <sha512file> <path>
65  //have designed this version of the GPL to prohibit the practice for those  //      For each group of duplicates that exists, preserves the duplicates that exist within path
66  //products.  If such problems arise substantially in other domains, we  //      and removes all others.  If no copies of the duplicate exist within path, no copies of the
67  //stand ready to extend this provision to those domains in future versions  //      duplicate will be removed.
68  //of the GPL, as needed to protect the freedom of users.  //   qdedup dryrun_preserve_inside <sha512file> <path>
69  //  //      Exactly like "dedup_preserve_inside", except that no files will be deleted.  Text will be
70  //  Finally, every program is threatened constantly by software patents.  //      output to explain what would be deleted by "dedup_preserve_inside".
71  //States should not allow patents to restrict development and use of  //   qdedup dedup_nopath <sha512file>
72  //software on general-purpose computers, but in those that do, we wish to  //      For each group of duplicates that exists, preserves only the first (the only with lowest
73  //avoid the special danger that patents applied to a free program could  //      sort-order filename).
74  //make it effectively proprietary.  To prevent this, the GPL assures that  //   qdedup dryrun_nopath <sha512file> <path>
75  //patents cannot be used to render the program non-free.  //      Exactly like "dedup_nopath", except that no files will be deleted.  Text will be
76  //  //      output to explain what would be deleted by "dedup_nopath".
77  //  The precise terms and conditions for copying, distribution and  //   qdedup dedup_preserve_outside <sha512file> <path>
78  //modification follow.  //      For each group of duplicates that exists, deletes duplicates only from within the specified
79  //  //      path.  If any duplicates do not have at least one copy within <path> no instances of the
80  //                       TERMS AND CONDITIONS  //      duplicate are deleted.
81  //  //   qdedup dryrun_preserve_outside <sha512file>
82  //  0. Definitions.  //      Exactly like "dedup_preserve_outside", except that no files will be deleted.  Text will be
83  //  //      output to explain what would be deleted by "dedup_preserve_outside".
84  //  "This License" refers to version 3 of the GNU General Public License.  //----------------------------------------------------------------------------------------------------
85  //  #include <math.h>
86  //  "Copyright" also means copyright-like laws that apply to other kinds of  #include <stdio.h>
87  //works, such as semiconductor masks.  #include <stdlib.h>
88  //  #include <string.h>
89  //  "The Program" refers to any copyrightable work licensed under this  #include <time.h>
90  //License.  Each licensee is addressed as "you".  "Licensees" and  #include <unistd.h>
91  //"recipients" may be individuals or organizations.  //----------------------------------------------------------------------------------------------------
92  //  #define LINELEN           (78)  //Number of printable characters in a line.
93  //  To "modify" a work means to copy from or adapt all or part of the work  #define MAXLINELEN      (2000)  //The maximum number of characters that may be in a line of the
94  //in a fashion requiring copyright permission, other than the making of an                                  //SHA512 input file.  This count includes the \0 terminator, so only
95  //exact copy.  The resulting work is called a "modified version" of the                                  //this value minus 1 characters may be in a line.
96  //earlier work or a work "based on" the earlier work.  #define UNLINKPAUSETIME  (0.1)  //Number of seconds to pause between file unlinks (deletions).  This
97  //                                  //is designed to give the user time to abort the program if desired
98  //  A "covered work" means either the unmodified Program or a work based                                  //before catastrophic quantities of files are deleted.
99  //on the Program.  //----------------------------------------------------------------------------------------------------
100  //  //Data structure that holds the character representation of and SHA512 hash, plus the specified
101  //  To "propagate" a work means to do anything with it that, without  //filename.
102  //permission, would make you directly or secondarily liable for  typedef struct
103  //infringement under applicable copyright law, except executing it on a  {
104  //computer or modifying a private copy.  Propagation includes copying,     char hash[129];
105  //distribution (with or without modification), making available to the        //512/4 = 128 characters for the hash, plus 1 character for zero terminator.
106  //public, and in some countries other activities as well.     char *fname;
107  //        //Filename as specified in the file, allocated via malloc() family.
108  //  To "convey" a work means any kind of propagation that enables other  } tFileHashRecord;
109  //parties to make or receive copies.  Mere interaction with a user through  //----------------------------------------------------------------------------------------------------
110  //a computer network, with no transfer of a copy, is not conveying.  //----------------------------------------------------------------------------------------------------
111  //  //-----  CHARACTER CLASSIFICATION FUNCTIONS  ---------------------------------------------------------
112  //  An interactive user interface displays "Appropriate Legal Notices"  //----------------------------------------------------------------------------------------------------
113  //to the extent that it includes a convenient and prominently visible  //----------------------------------------------------------------------------------------------------
114  //feature that (1) displays an appropriate copyright notice, and (2)  //TRUE if character is part of valid hash.
115  //tells the user that there is no warranty for the work (except to the  int is_valid_hash_char(char c)
116  //extent that warranties are provided), that licensees may convey the  {
117  //work under this License, and how to view a copy of this License.  If     switch(c)
118  //the interface presents a list of user commands or options, such as a     {
119  //menu, a prominent item in the list meets this criterion.        case '0':
120  //        case '1':
121  //  1. Source Code.        case '2':
122  //        case '3':
123  //  The "source code" for a work means the preferred form of the work        case '4':
124  //for making modifications to it.  "Object code" means any non-source        case '5':
125  //form of a work.        case '6':
126  //        case '7':
127  //  A "Standard Interface" means an interface that either is an official        case '8':
128  //standard defined by a recognized standards body, or, in the case of        case '9':
129  //interfaces specified for a particular programming language, one that        case 'a':
130  //is widely used among developers working in that language.        case 'b':
131  //        case 'c':
132  //  The "System Libraries" of an executable work include anything, other        case 'd':
133  //than the work as a whole, that (a) is included in the normal form of        case 'e':
134  //packaging a Major Component, but which is not part of that Major        case 'f':
135  //Component, and (b) serves only to enable use of the work with that           return(1);
136  //Major Component, or to implement a Standard Interface for which an           break;
137  //implementation is available to the public in source code form.  A        default:
138  //"Major Component", in this context, means a major essential component           return(0);
139  //(kernel, window system, and so on) of the specific operating system           break;
140  //(if any) on which the executable work runs, or a compiler used to     }
141  //produce the work, or an object code interpreter used to run it.  }
142  //  //----------------------------------------------------------------------------------------------------
143  //  The "Corresponding Source" for a work in object code form means all  //TRUE if character is part of newline sequence
144  //the source code needed to generate, install, and (for an executable  int is_newline_sequence_char(char c)
145  //work) run the object code and to modify the work, including scripts to  {
146  //control those activities.  However, it does not include the work's     switch(c)
147  //System Libraries, or general-purpose tools or generally available free     {
148  //programs which are used unmodified in performing those activities but        case 13:
149  //which are not part of the work.  For example, Corresponding Source        case 10:
150  //includes interface definition files associated with source files for           return(1);
151  //the work, and the source code for shared libraries and dynamically           break;
152  //linked subprograms that the work is specifically designed to require,        default:
153  //such as by intimate data communication or control flow between those           return(0);
154  //subprograms and other parts of the work.           break;
155  //     }
156  //  The Corresponding Source need not include anything that users  }
157  //can regenerate automatically from other parts of the Corresponding  
158  //Source.  //----------------------------------------------------------------------------------------------------
159  //  //----------------------------------------------------------------------------------------------------
160  //  The Corresponding Source for a work in source code form is that  //-----  FORMATTED OUTPUT FUNCTIONS  -----------------------------------------------------------------
161  //same work.  //----------------------------------------------------------------------------------------------------
162  //  //----------------------------------------------------------------------------------------------------
163  //  2. Basic Permissions.  //Repeats a character to a stream a specified number of times.
164  //  //
165  //  All rights granted under this License are granted for the term of  void stream_rep_char(FILE *s, char c, unsigned n)
166  //copyright on the Program, and are irrevocable provided the stated  {
167  //conditions are met.  This License explicitly affirms your unlimited     while(n--)
168  //permission to run the unmodified Program.  The output from running a     {
169  //covered work is covered by this License only if the output, given its        fprintf(s, "%c", c);
170  //content, constitutes a covered work.  This License acknowledges your     }
171  //rights of fair use or other equivalent, as provided by copyright law.  }
172  //  //----------------------------------------------------------------------------------------------------
173  //  You may make, run and propagate covered works that you do not  //Prints a horizontal line to a stream, including the newline.
174  //convey, without conditions so long as your license otherwise remains  //
175  //in force.  You may convey covered works to others for the sole purpose  void stream_hline(FILE *s)
176  //of having them make modifications exclusively for you, or provide you  {
177  //with facilities for running those works, provided that you comply with     stream_rep_char(s, '-', LINELEN);
178  //the terms of this License in conveying all material for which you do     fprintf(s, "\n");
179  //not control copyright.  Those thus making or running the covered works  }
180  //for you must do so exclusively on your behalf, under your direction  //----------------------------------------------------------------------------------------------------
181  //and control, on terms that prohibit them from making any copies of  //Prints a horizontal line to a stdout, including the newline.
182  //your copyrighted material outside their relationship with you.  //
183  //  void stdout_hline(void)
184  //  Conveying under any other circumstances is permitted solely under  {
185  //the conditions stated below.  Sublicensing is not allowed; section 10     stream_rep_char(stdout, '-', LINELEN);
186  //makes it unnecessary.     fprintf(stdout, "\n");
187  //  }
188  //  3. Protecting Users' Legal Rights From Anti-Circumvention Law.  //----------------------------------------------------------------------------------------------------
189  //  //----------------------------------------------------------------------------------------------------
190  //  No covered work shall be deemed part of an effective technological  //-----  FATAL ERROR FUNCTIONS  ----------------------------------------------------------------------
191  //measure under any applicable law fulfilling obligations under article  //----------------------------------------------------------------------------------------------------
192  //11 of the WIPO copyright treaty adopted on 20 December 1996, or  //----------------------------------------------------------------------------------------------------
193  //similar laws prohibiting or restricting circumvention of such  //Errors out fatally.
194  //measures.  //
195  //  void fatal(const char *desc, const char *file, unsigned line)
196  //  When you convey a covered work, you waive any legal power to forbid  {
197  //circumvention of technological measures to the extent such circumvention     stdout_hline();
198  //is effected by exercising rights under this License with respect to     printf("Fatal error:  %s\n", desc);
199  //the covered work, and you disclaim any intention to limit operation or     printf("Source file:  %s\n", file);
200  //modification of the work as a means of enforcing, against the work's     printf("Line       :  %u\n", line);
201  //users, your or third parties' legal rights to forbid circumvention of     stdout_hline();
202  //technological measures.     exit(1);
203  //  }
204  //  4. Conveying Verbatim Copies.  //----------------------------------------------------------------------------------------------------
205  //  //----------------------------------------------------------------------------------------------------
206  //  You may convey verbatim copies of the Program's source code as you  //-----  MEMORY ALLOCATION WRAPPERS  -----------------------------------------------------------------
207  //receive it, in any medium, provided that you conspicuously and  //----------------------------------------------------------------------------------------------------
208  //appropriately publish on each copy an appropriate copyright notice;  //----------------------------------------------------------------------------------------------------
209  //keep intact all notices stating that this License and any  //malloc() wrapper.
210  //non-permissive terms added in accord with section 7 apply to the code;  void *w_malloc(size_t nbytes)
211  //keep intact all notices of the absence of any warranty; and give all  {
212  //recipients a copy of this License along with the Program.     void *rv;
213  //  
214  //  You may charge any price or no price for each copy that you convey,     if (!nbytes)
215  //and you may offer support or warranty protection for a fee.     {
216  //        fatal("Memory allocation request for 0 bytes.", __FILE__, __LINE__);
217  //  5. Conveying Modified Source Versions.     }
218  //  
219  //  You may convey a work based on the Program, or the modifications to     rv = malloc(nbytes);
220  //produce it from the Program, in the form of source code under the  
221  //terms of section 4, provided that you also meet all of these conditions:     if (!rv)
222  //     {
223  //    a) The work must carry prominent notices stating that you modified        fatal("Out of memory in malloc() request.", __FILE__, __LINE__);
224  //    it, and giving a relevant date.     }
225  //  
226  //    b) The work must carry prominent notices stating that it is     //Zero out, just for consistency.
227  //    released under this License and any conditions added under section     memset(rv, 0, nbytes);
228  //    7.  This requirement modifies the requirement in section 4 to  }
229  //    "keep intact all notices".  //----------------------------------------------------------------------------------------------------
230  //  //realloc() wrapper.
231  //    c) You must license the entire work, as a whole, under this  void *w_realloc(void *p, size_t n)
232  //    License to anyone who comes into possession of a copy.  This  {
233  //    License will therefore apply, along with any applicable section 7     void *rv;
234  //    additional terms, to the whole of the work, and all its parts,  
235  //    regardless of how they are packaged.  This License gives no     if (!n)
236  //    permission to license the work in any other way, but it does not     {
237  //    invalidate such permission if you have separately received it.        fatal("Memory reallocation request for 0 bytes.", __FILE__, __LINE__);
238  //     }
239  //    d) If the work has interactive user interfaces, each must display  
240  //    Appropriate Legal Notices; however, if the Program has interactive     if (!p)
241  //    interfaces that do not display Appropriate Legal Notices, your     {
242  //    work need not make them do so.        fatal("Memory reallocation request with NULL pointer.", __FILE__, __LINE__);
243  //     }
244  //  A compilation of a covered work with other separate and independent  
245  //works, which are not by their nature extensions of the covered work,     rv = realloc(p, n);
246  //and which are not combined with it such as to form a larger program,  
247  //in or on a volume of a storage or distribution medium, is called an     if (!rv)
248  //"aggregate" if the compilation and its resulting copyright are not     {
249  //used to limit the access or legal rights of the compilation's users        fatal("Out of memory in realloc() request.", __FILE__, __LINE__);
250  //beyond what the individual works permit.  Inclusion of a covered work     }
251  //in an aggregate does not cause this License to apply to the other  }
252  //parts of the aggregate.  //----------------------------------------------------------------------------------------------------
253  //  //----------------------------------------------------------------------------------------------------
254  //  6. Conveying Non-Source Forms.  //-----  SLEEP FUNCTIONS  ----------------------------------------------------------------------------
255  //  //----------------------------------------------------------------------------------------------------
256  //  You may convey a covered work in object code form under the terms  //----------------------------------------------------------------------------------------------------
257  //of sections 4 and 5, provided that you also convey the  //Sleep for a time, in seconds.
258  //machine-readable Corresponding Source under the terms of this License,  void w_sleep(double seconds)
259  //in one of these ways:  {
260  //     struct timespec t;
261  //    a) Convey the object code in, or embodied in, a physical product  
262  //    (including a physical distribution medium), accompanied by the     if (seconds < 0)
263  //    Corresponding Source fixed on a durable physical medium     {
264  //    customarily used for software interchange.        fatal("Sleep for negative time request.", __FILE__, __LINE__);
265  //     }
266  //    b) Convey the object code in, or embodied in, a physical product     else if (seconds > 3600)
267  //    (including a physical distribution medium), accompanied by a     {
268  //    written offer, valid for at least three years and valid for as        fatal("Sleep for too long request.", __FILE__, __LINE__);
269  //    long as you offer spare parts or customer support for that product     }
270  //    model, to give anyone who possesses the object code either (1) a  
271  //    copy of the Corresponding Source for all the software in the     t.tv_sec  = floor(seconds);
272  //    product that is covered by this License, on a durable physical     t.tv_nsec = (seconds - floor(seconds)) * 1E9;
273  //    medium customarily used for software interchange, for a price no  
274  //    more than your reasonable cost of physically performing this     nanosleep(&t, NULL);
275  //    conveying of source, or (2) access to copy the  }
276  //    Corresponding Source from a network server at no charge.  //----------------------------------------------------------------------------------------------------
277  //  //----------------------------------------------------------------------------------------------------
278  //    c) Convey individual copies of the object code with a copy of the  //-----  SHA512 FIELD READ FUNCTIONS  ----------------------------------------------------------------
279  //    written offer to provide the Corresponding Source.  This  //----------------------------------------------------------------------------------------------------
280  //    alternative is allowed only occasionally and noncommercially, and  //----------------------------------------------------------------------------------------------------
281  //    only if you received the object code with such an offer, in accord  //These functions read in an individual field of a standard SHA512 file generated using application
282  //    with subsection 6b.  //of the standard sha512sum program.
283  //  //
284  //    d) Convey the object code by offering access from a designated  //*rcode = 1, success.
285  //    place (gratis or for a charge), and offer equivalent access to the  //         0, legal end of file, record assigned.
286  //    Corresponding Source in the same way through the same place at no  void get_sha512file_line(FILE *s, int *rcode, tFileHashRecord *hash_rec)
287  //    further charge.  You need not require recipients to copy the  {
288  //    Corresponding Source along with the object code.  If the place to     unsigned bidx;
289  //    copy the object code is a network server, the Corresponding Source     unsigned nchars;
290  //    may be on a different server (operated by you or a third party)     int ic;
291  //    that supports equivalent copying facilities, provided you maintain     int exitflag;
292  //    clear directions next to the object code saying where to find the     int eoffound;
293  //    Corresponding Source.  Regardless of what server hosts the     int eolfound;
294  //    Corresponding Source, you remain obligated to ensure that it is     char c;
295  //    available for as long as needed to satisfy these requirements.     char buf[MAXLINELEN];
296  //  
297  //    e) Convey the object code using peer-to-peer transmission, provided     //Zero out the buffer.  This handles string termination automatically.
298  //    you inform other peers where the object code and Corresponding     memset(buf, 0, sizeof(buf));
299  //    Source of the work are being offered to the general public at no  
300  //    charge under subsection 6d.     //Read characters into the buffer until either hit EOF, newline, or can't
301  //     //fill the buffer any longer.
302  //  A separable portion of the object code, whose source code is excluded     eoffound  = 0;
303  //from the Corresponding Source as a System Library, need not be     eolfound  = 0;
304  //included in conveying the object code work.     exitflag  = 0;
305  //     bidx      = 0;
306  //  A "User Product" is either (1) a "consumer product", which means any     do
307  //tangible personal property which is normally used for personal, family,     {
308  //or household purposes, or (2) anything designed or sold for incorporation        ic = fgetc(s);
309  //into a dwelling.  In determining whether a product is a consumer product,        c  = ic;
310  //doubtful cases shall be resolved in favor of coverage.  For a particular  
311  //product received by a particular user, "normally used" refers to a        if (ic == EOF)
312  //typical or common use of that class of product, regardless of the status        {
313  //of the particular user or of the way in which the particular user           eoffound  = 1;
314  //actually uses, or expects or is expected to use, the product.  A product           eolfound  = 0;
315  //is a consumer product regardless of whether the product has substantial           nchars    = bidx;
316  //commercial, industrial or non-consumer uses, unless such uses represent           exitflag  = 1;
317  //the only significant mode of use of the product.        }
318  //        else if (is_newline_sequence_char(c))
319  //  "Installation Information" for a User Product means any methods,        {
320  //procedures, authorization keys, or other information required to install           eoffound  = 0;
321  //and execute modified versions of a covered work in that User Product from           eolfound  = 1;
322  //a modified version of its Corresponding Source.  The information must           nchars    = bidx;
323  //suffice to ensure that the continued functioning of the modified object           exitflag  = 1;
324  //code is in no case prevented or interfered with solely because        }
325  //modification has been made.        else if (bidx >= (MAXLINELEN - 1))
326  //        {
327  //  If you convey an object code work under this section in, or with, or           fatal("SHA512 hash file line too long to parse.", __FILE__, __LINE__);
328  //specifically for use in, a User Product, and the conveying occurs as        }
329  //part of a transaction in which the right of possession and use of the        else
330  //User Product is transferred to the recipient in perpetuity or for a        {
331  //fixed term (regardless of how the transaction is characterized), the           buf[bidx] = c;
332  //Corresponding Source conveyed under this section must be accompanied           bidx++;
333  //by the Installation Information.  But this requirement does not apply           exitflag  = 0;
334  //if neither you nor any third party retains the ability to install        }
335  //modified object code on the User Product (for example, the work has     } while(! exitflag);
336  //been installed in ROM).  
337  //     //If we encountered a newline, inch past it.  We may encounter an EOF.
338  //  The requirement to provide Installation Information does not include a     if (eolfound)
339  //requirement to continue to provide support service, warranty, or updates     {
340  //for a work that has been modified or installed by the recipient, or for        exitflag = 0;
341  //the User Product in which it has been modified or installed.  Access to a        do
342  //network may be denied when the modification itself materially and        {
343  //adversely affects the operation of the network or violates the rules and           ic = fgetc(s);
344  //protocols for communication across the network.           c  = ic;
345  //  
346  //  Corresponding Source conveyed, and Installation Information provided,           if (ic == EOF)
347  //in accord with this section must be in a format that is publicly           {
348  //documented (and with an implementation available to the public in              eoffound = 1;
349  //source code form), and must require no special password or key for              eolfound = 0;
350  //unpacking, reading or copying.              exitflag = 1;
351  //           }
352  //  7. Additional Terms.           else if (is_newline_sequence_char(c))
353  //           {
354  //  "Additional permissions" are terms that supplement the terms of this              exitflag = 0;
355  //License by making exceptions from one or more of its conditions.           }
356  //Additional permissions that are applicable to the entire Program shall           else
357  //be treated as though they were included in this License, to the extent           {
358  //that they are valid under applicable law.  If additional permissions              //We hit the next line.  Put the character back.
359  //apply only to part of the Program, that part may be used separately              eoffound = 0;
360  //under those permissions, but the entire Program remains governed by              eolfound = 1;
361  //this License without regard to the additional permissions.              ungetc(ic, s);
362  //              exitflag = 1;
363  //  When you convey a copy of a covered work, you may at your option           }
364  //remove any additional permissions from that copy, or from any part of        } while(! exitflag);
365  //it.  (Additional permissions may be written to require their own     }
366  //removal in certain cases when you modify the work.)  You may place  
367  //additional permissions on material, added by you to a covered work,     //For better or worse, we have a \0-terminated line in the buffer.
368  //for which you have or can give appropriate copyright permission.     //
369  //     //Zero the caller's area.  This takes care of the hash terminator as well.
370  //  Notwithstanding any other provision of this License, for material you     memset(hash_rec, 0, sizeof(*hash_rec));
371  //add to a covered work, you may (if authorized by the copyright holders of  
372  //that material) supplement the terms of this License with terms:     //Ensure that we have at least 128 characters, and they are all hex characters.
373  //     //Otherwise, we can't proceed.
374  //    a) Disclaiming warranty or limiting liability differently from the     if (nchars < 128)
375  //    terms of sections 15 and 16 of this License; or     {
376  //        fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
377  //    b) Requiring preservation of specified reasonable legal notices or     }
378  //    author attributions in that material or in the Appropriate Legal     else
379  //    Notices displayed by works containing it; or     {
380  //        for (bidx = 0; bidx < 128; bidx++)
381  //    c) Prohibiting misrepresentation of the origin of that material, or        {
382  //    requiring that modified versions of such material be marked in           if (! is_valid_hash_char(buf[bidx]))
383  //    reasonable ways as different from the original version; or           {
384  //              fatal("Character in SHA512 hash portion of line inconsistent with hash.", __FILE__, __LINE__);
385  //    d) Limiting the use for publicity purposes of names of licensors or           }
386  //    authors of the material; or        }
387  //     }
388  //    e) Declining to grant rights under trademark law for use of some  
389  //    trade names, trademarks, or service marks; or     //The 129th and 130'th character must be present and must be a space and asterisk, respectively.
390  //     if (nchars < 130)
391  //    f) Requiring indemnification of licensors and authors of that     {
392  //    material by anyone who conveys the material (or modified versions of        fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
393  //    it) with contractual assumptions of liability to the recipient, for     }
394  //    any liability that these contractual assumptions directly impose on     else if (buf[128] != ' ')
395  //    those licensors and authors.     {
396  //        fatal("129th hash line character must be \" \".", __FILE__, __LINE__);
397  //  All other non-permissive additional terms are considered "further     }
398  //restrictions" within the meaning of section 10.  If the Program as you     else if (buf[129] != '*')
399  //received it, or any part of it, contains a notice stating that it is     {
400  //governed by this License along with a term that is a further        fatal("130th hash line character must be \"*\".", __FILE__, __LINE__);
401  //restriction, you may remove that term.  If a license document contains     }
402  //a further restriction but permits relicensing or conveying under this  
403  //License, you may add to a covered work material governed by the terms     //There must be a 131'st character.  Beyond that, we can't qualify, because filenames may
404  //of that license document, provided that the further restriction does     //have odd characters and may be of any length.
405  //not survive such relicensing or conveying.     if (nchars < 131)
406  //     {
407  //  If you add terms to a covered work in accord with this section, you        fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
408  //must place, in the relevant source files, a statement of the     }
409  //additional terms that apply to those files, or a notice indicating  
410  //where to find the applicable terms.     //Copy the hash to the caller's area.  The terminator has already been inserted.
411  //     memcpy(&(hash_rec->hash[0]), buf, 128);
412  //  Additional terms, permissive or non-permissive, may be stated in the  
413  //form of a separately written license, or stated as exceptions;     //Allocate space for the filename.
414  //the above requirements apply either way.     hash_rec->fname = w_malloc(strlen(buf+130) + 1);
415  //  
416  //  8. Termination.     //Make the copy.
417  //     strcpy(hash_rec->fname, buf+130);
418  //  You may not propagate or modify a covered work except as expressly  
419  //provided under this License.  Any attempt otherwise to propagate or     if (eoffound)
420  //modify it is void, and will automatically terminate your rights under        *rcode = 0;
421  //this License (including any patent licenses granted under the third     else
422  //paragraph of section 11).        *rcode = 1;
423  //  }
424  //  However, if you cease all violation of this License, then your  //----------------------------------------------------------------------------------------------------
425  //license from a particular copyright holder is reinstated (a)  void parseinputfile(tFileHashRecord **parsed_recs, unsigned *count, char *fname)
426  //provisionally, unless and until the copyright holder explicitly and  {
427  //finally terminates your license, and (b) permanently, if the copyright     FILE *s;
428  //holder fails to notify you of the violation by some reasonable means     int rcode;
429  //prior to 60 days after the cessation.  
430  //     //Try to open the file for reading.  Inability is a failure.
431  //  Moreover, your license from a particular copyright holder is     s = fopen(fname, "r");
432  //reinstated permanently if the copyright holder notifies you of the     if (!s)
433  //violation by some reasonable means, this is the first time you have     {
434  //received notice of violation of this License (for any work) from that         fatal("Hash file open failure.", __FILE__, __LINE__);
435  //copyright holder, and you cure the violation prior to 30 days after     }
436  //your receipt of the notice.  
437  //     //Start off with a count of 0 and a NULL pointer.
438  //  Termination of your rights under this section does not terminate the     *count       = 0;
439  //licenses of parties who have received copies or rights from you under     *parsed_recs = NULL;
440  //this License.  If your rights have been terminated and not permanently  
441  //reinstated, you do not qualify to receive new licenses for the same     do
442  //material under section 10.     {
443  //        //For the first time, allocate space for one record.  Beyond that,
444  //  9. Acceptance Not Required for Having Copies.        //expand it.
445  //        if (! *parsed_recs)
446  //  You are not required to accept this License in order to receive or        {
447  //run a copy of the Program.  Ancillary propagation of a covered work           *parsed_recs = w_malloc(sizeof(tFileHashRecord));
448  //occurring solely as a consequence of using peer-to-peer transmission        }
449  //to receive a copy likewise does not require acceptance.  However,        else
450  //nothing other than this License grants you permission to propagate or        {
451  //modify any covered work.  These actions infringe copyright if you do           *parsed_recs = w_realloc(*parsed_recs, (size_t)((*count + 1)) * sizeof(tFileHashRecord));
452  //not accept this License.  Therefore, by modifying or propagating a        }
453  //covered work, you indicate your acceptance of this License to do so.  
454  //        //Parse and fill in the space.
455  //  10. Automatic Licensing of Downstream Recipients.        get_sha512file_line(s, &rcode, (*parsed_recs) + (*count));
456  //  
457  //  Each time you convey a covered work, the recipient automatically        //We now have one more.
458  //receives a license from the original licensors, to run, modify and        (*count)++;
459  //propagate that work, subject to this License.  You are not responsible     } while(rcode == 1);
460  //for enforcing compliance by third parties with this License.  
461  //     //Try to close the file.  Inability is a failure.
462  //  An "entity transaction" is a transaction transferring control of an     if (fclose(s))
463  //organization, or substantially all assets of one, or subdividing an     {
464  //organization, or merging organizations.  If propagation of a covered         fatal("Hash file close failure.", __FILE__, __LINE__);
465  //work results from an entity transaction, each party to that     }
466  //transaction who receives a copy of the work also receives whatever  }
467  //licenses to the work the party's predecessor in interest had or could  //----------------------------------------------------------------------------------------------------
468  //give under the previous paragraph, plus a right to possession of the  int sortcmpascendinghash(const void *p0_in, const void *p1_in)
469  //Corresponding Source of the work from the predecessor in interest, if  {
470  //the predecessor has it or can get it with reasonable efforts.     const tFileHashRecord *p0, *p1;
471  //  
472  //  You may not impose any further restrictions on the exercise of the     p0 = p0_in;
473  //rights granted or affirmed under this License.  For example, you may     p1 = p1_in;
474  //not impose a license fee, royalty, or other charge for exercise of  
475  //rights granted under this License, and you may not initiate litigation     return(strcmp(p0->hash, p1->hash));
476  //(including a cross-claim or counterclaim in a lawsuit) alleging that  }
477  //any patent claim is infringed by making, using, selling, offering for  
478  //sale, or importing the Program or any portion of it.  //----------------------------------------------------------------------------------------------------
479  //  void sortinternaldsbyhash(tFileHashRecord *parsed_recs, unsigned count)
480  //  11. Patents.  {
481  //     qsort(parsed_recs, count, sizeof(tFileHashRecord), sortcmpascendinghash);
482  //  A "contributor" is a copyright holder who authorizes use under this  }
483  //License of the Program or a work on which the Program is based.  The  //----------------------------------------------------------------------------------------------------
484  //work thus licensed is called the contributor's "contributor version".  int sortcmpascendingfname(const void *p0_in, const void *p1_in)
485  //  {
486  //  A contributor's "essential patent claims" are all patent claims     const tFileHashRecord *p0, *p1;
487  //owned or controlled by the contributor, whether already acquired or  
488  //hereafter acquired, that would be infringed by some manner, permitted     p0 = p0_in;
489  //by this License, of making, using, or selling its contributor version,     p1 = p1_in;
490  //but do not include claims that would be infringed only as a  
491  //consequence of further modification of the contributor version.  For     return(strcmp(p0->fname, p1->fname));
492  //purposes of this definition, "control" includes the right to grant  }
493  //patent sublicenses in a manner consistent with the requirements of  //----------------------------------------------------------------------------------------------------
494  //this License.  //This sort has to be run after the hash sort.  Within groups of identical hashes, it sorts by
495  //  //ascending filename.
496  //  Each contributor grants you a non-exclusive, worldwide, royalty-free  void sortinternalgroupfname(tFileHashRecord *parsed_recs, unsigned count)
497  //patent license under the contributor's essential patent claims, to  {
498  //make, use, sell, offer for sale, import and otherwise run, modify and      unsigned ui;
499  //propagate the contents of its contributor version.      unsigned i_group_min, i_group_max;
500  //  
501  //  In the following three paragraphs, a "patent license" is any express      if (! count)
502  //agreement or commitment, however denominated, not to enforce a patent         return;
503  //(such as an express permission to practice a patent or covenant not to  
504  //sue for patent infringement).  To "grant" such a patent license to a      i_group_min = 0;
505  //party means to make such an agreement or commitment not to enforce a      i_group_max = 0;
506  //patent against the party.  
507  //      do
508  //  If you convey a covered work, knowingly relying on a patent license,      {
509  //and the Corresponding Source of the work is not available for anyone         //Advance i_group_max to the end of the group of duplicates.
510  //to copy, free of charge and under the terms of this License, through a         while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
511  //publicly available network server or other readily accessible means,         {
512  //then you must either (1) cause the Corresponding Source to be so            i_group_max++;
513  //available, or (2) arrange to deprive yourself of the benefit of the         }
514  //patent license for this particular work, or (3) arrange, in a manner  
515  //consistent with the requirements of this License, to extend the patent         if (i_group_min != i_group_max)
516  //license to downstream recipients.  "Knowingly relying" means you have         {
517  //actual knowledge that, but for the patent license, your conveying the            //Sort the internal group.
518  //covered work in a country, or your recipient's use of the covered work            qsort(parsed_recs + i_group_min,
519  //in a country, would infringe one or more identifiable patents in that                  i_group_max - i_group_min + 1,
520  //country that you have reason to believe are valid.                  sizeof(tFileHashRecord),
521  //                  sortcmpascendingfname);
522  //  If, pursuant to or in connection with a single transaction or         }
523  //arrangement, you convey, or propagate by procuring conveyance of, a  
524  //covered work, and grant a patent license to some of the parties         //On to the next group.
525  //receiving the covered work authorizing them to use, propagate, modify         i_group_max++;
526  //or convey a specific copy of the covered work, then the patent license         i_group_min = i_group_max;
527  //you grant is automatically extended to all recipients of the covered  
528  //work and works based on it.      } while (i_group_max < (count - 1));
529  //  }
530  //  A patent license is "discriminatory" if it does not include within  //----------------------------------------------------------------------------------------------------
531  //the scope of its coverage, prohibits the exercise of, or is  void printsinglerecord(tFileHashRecord *rec, unsigned elno)
532  //conditioned on the non-exercise of one or more of the rights that are  {
533  //specifically granted under this License.  You may not convey a covered     printf("[%9u]\n", elno);
534  //work if you are a party to an arrangement with a third party that is     printf("Hash       : %s\n", rec->hash);
535  //in the business of distributing software, under which you make payment     printf("Filename   : %s\n", rec->fname);
536  //to the third party based on the extent of your activity of conveying     stdout_hline();
537  //the work, and under which the third party grants, to any of the  }
538  //parties who would receive the covered work from you, a discriminatory  //----------------------------------------------------------------------------------------------------
539  //patent license (a) in connection with copies of the covered work  void printinternalds(tFileHashRecord *parsed_recs, unsigned count)
540  //conveyed by you (or copies made from those copies), or (b) primarily  {
541  //for and in connection with specific products or compilations that      unsigned i;
542  //contain the covered work, unless you entered into that arrangement,  
543  //or that patent license was granted, prior to 28 March 2007.      for (i=0; i<count; i++)
544  //      {
545  //  Nothing in this License shall be construed as excluding or limiting         printsinglerecord(parsed_recs + i, i);
546  //any implied license or other defenses to infringement that may      }
547  //otherwise be available to you under applicable patent law.  }
548  //  //----------------------------------------------------------------------------------------------------
549  //  12. No Surrender of Others' Freedom.  void gather_dup_stats(tFileHashRecord *parsed_recs, unsigned count, unsigned *out_num_dups, unsigned *out_cumulative_dups)
550  //  {
551  //  If conditions are imposed on you (whether by court order, agreement or     unsigned i_group_min, i_group_max;
552  //otherwise) that contradict the conditions of this License, they do not  
553  //excuse you from the conditions of this License.  If you cannot convey a     *out_num_dups = 0;
554  //covered work so as to satisfy simultaneously your obligations under this     *out_cumulative_dups = 0;
555  //License and any other pertinent obligations, then as a consequence you may  
556  //not convey it at all.  For example, if you agree to terms that obligate you     if (! count)
557  //to collect a royalty for further conveying from those to whom you convey        return;
558  //the Program, the only way you could satisfy both those terms and this  
559  //License would be to refrain entirely from conveying the Program.     i_group_min = 0;
560  //     i_group_max = 0;
561  //  13. Use with the GNU Affero General Public License.  
562  //     do
563  //  Notwithstanding any other provision of this License, you have     {
564  //permission to link or combine any covered work with a work licensed        //Advance i_group_max to the end of the group of duplicates.
565  //under version 3 of the GNU Affero General Public License into a single        while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
566  //combined work, and to convey the resulting work.  The terms of this        {
567  //License will continue to apply to the part which is the covered work,           i_group_max++;
568  //but the special requirements of the GNU Affero General Public License,        }
569  //section 13, concerning interaction through a network will apply to the  
570  //combination as such.        //Log the findings.
571  //        if (i_group_min != i_group_max)
572  //  14. Revised Versions of this License.        {
573  //           (*out_num_dups)++;
574  //  The Free Software Foundation may publish revised and/or new versions of           (*out_cumulative_dups) += (i_group_max - i_group_min + 1);
575  //the GNU General Public License from time to time.  Such new versions will        }
576  //be similar in spirit to the present version, but may differ in detail to  
577  //address new problems or concerns.        //On to the next group.
578  //        i_group_max++;
579  //  Each version is given a distinguishing version number.  If the        i_group_min = i_group_max;
580  //Program specifies that a certain numbered version of the GNU General  
581  //Public License "or any later version" applies to it, you have the     } while (i_group_max < (count - 1));
582  //option of following the terms and conditions either of that numbered  }
583  //version or of any later version published by the Free Software  //----------------------------------------------------------------------------------------------------
584  //Foundation.  If the Program does not specify a version number of the  void option_dups(char *fname)
585  //GNU General Public License, you may choose any version ever published  {
586  //by the Free Software Foundation.     tFileHashRecord *parsed_recs;
587  //     unsigned count, num_dups, cumulative_dups;
588  //  If the Program specifies that a proxy can decide which future  
589  //versions of the GNU General Public License can be used, that proxy's     parseinputfile(&parsed_recs, &count, fname);
590  //public statement of acceptance of a version permanently authorizes you     //printf("%u records parsed.\n", count);
591  //to choose that version for the Program.     sortinternaldsbyhash(parsed_recs, count);
592  //     sortinternalgroupfname(parsed_recs, count);
593  //  Later license versions may give you additional or different     printinternalds(parsed_recs, count);
594  //permissions.  However, no additional obligations are imposed on any     stdout_hline();
595  //author or copyright holder as a result of your choosing to follow a     gather_dup_stats(parsed_recs, count, &num_dups, &cumulative_dups);
596  //later version.     printf("Number of duplicated files  : %u\n", num_dups);
597  //     if (num_dups)
598  //  15. Disclaimer of Warranty.     {
599  //        printf("Average number of duplicates: %.2f\n", (double)cumulative_dups/(double)num_dups);
600  //  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY     }
601  //APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT  }
602  //HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY  //----------------------------------------------------------------------------------------------------
603  //OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,  void option_filterdups(char *fname)
604  //THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR  {
605  //PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM     tFileHashRecord *parsed_recs;
606  //IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF     unsigned dupgroup;
607  //ALL NECESSARY SERVICING, REPAIR OR CORRECTION.     unsigned count;
608  //     unsigned ui;
609  //  16. Limitation of Liability.     unsigned i_group_min, i_group_max;
610  //  
611  //  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING     parseinputfile(&parsed_recs, &count, fname);
612  //WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS     //printf("%u records parsed.\n", count);
613  //THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY     sortinternaldsbyhash(parsed_recs, count);
614  //GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE     sortinternalgroupfname(parsed_recs, count);
615  //USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF  
616  //DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD     if (! count)
617  //PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),        return;
618  //EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF  
619  //SUCH DAMAGES.     dupgroup = 0;
620  //     i_group_min = 0;
621  //  17. Interpretation of Sections 15 and 16.     i_group_max = 0;
622  //  
623  //  If the disclaimer of warranty and limitation of liability provided     do
624  //above cannot be given local legal effect according to their terms,     {
625  //reviewing courts shall apply local law that most closely approximates        //Advance i_group_max to the end of the group of duplicates.
626  //an absolute waiver of all civil liability in connection with the        while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
627  //Program, unless a warranty or assumption of liability accompanies a        {
628  //copy of the Program in return for a fee.           i_group_max++;
629  //        }
630  //                     END OF TERMS AND CONDITIONS  
631  //        //Print the findings.
632  //            How to Apply These Terms to Your New Programs        if (i_group_min != i_group_max)
633  //        {
634  //  If you develop a new program, and you want it to be of the greatest            printf("Duplicate group %u:\n", dupgroup);
635  //possible use to the public, the best way to achieve this is to make it            for (ui = i_group_min; ui <= i_group_max; ui++)
636  //free software which everyone can redistribute and change under these terms.            {
637  //               printf("%s\n", parsed_recs[ui].fname);
638  //  To do so, attach the following notices to the program.  It is safest            }
639  //to attach them to the start of each source file to most effectively  
640  //state the exclusion of warranty; and each file should have at least            dupgroup++;
641  //the "copyright" line and a pointer to where the full notice is found.  
642  //            stdout_hline();
643  //    <one line to give the program's name and a brief idea of what it does.>        }
644  //    Copyright (C) <year>  <name of author>  
645  //        //On to the next group.
646  //    This program is free software: you can redistribute it and/or modify        i_group_max++;
647  //    it under the terms of the GNU General Public License as published by        i_group_min = i_group_max;
648  //    the Free Software Foundation, either version 3 of the License, or  
649  //    (at your option) any later version.     } while (i_group_max < (count - 1));
650  //  }
651  //    This program is distributed in the hope that it will be useful,  //----------------------------------------------------------------------------------------------------
652  //    but WITHOUT ANY WARRANTY; without even the implied warranty of  //Returns true if the filename is within the specified path, or false otherwise.
653  //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  int is_path_member(const char *fname, const char *path)
654  //    GNU General Public License for more details.  {
655  //     if (strlen(fname) == 0)
656  //    You should have received a copy of the GNU General Public License     {
657  //    along with this program.  If not, see <http://www.gnu.org/licenses/>.        fatal("Zero-length filename.", __FILE__, __LINE__);
658  //     }
659  //Also add information on how to contact you by electronic and paper mail.     else if (strlen(path) == 0)
660  //     {
661  //  If the program does terminal interaction, make it output a short        fatal("Zero-length path.", __FILE__, __LINE__);
662  //notice like this when it starts in an interactive mode:     }
663  //     else if (path[strlen(path) - 1] != '/')
664  //    <program>  Copyright (C) <year>  <name of author>     {
665  //    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.        fatal("Paths must canonically end with forward slash character.", __FILE__, __LINE__);
666  //    This is free software, and you are welcome to redistribute it     }
667  //    under certain conditions; type `show c' for details.     else if (strlen(fname) <= strlen(path))
668  //     {
669  //The hypothetical commands `show w' and `show c' should show the appropriate        //Can't be in the path because filename is not longer than path name.
670  //parts of the General Public License.  Of course, your program's commands        return 0;
671  //might be different; for a GUI interface, you would use an "about box".     }
672  //     else if (memcmp(fname, path, strlen(path)) == 0)
673  //  You should also get your employer (if you work as a programmer) or school,     {
674  //if any, to sign a "copyright disclaimer" for the program, if necessary.        return 1;
675  //For more information on this, and how to apply and follow the GNU GPL, see     }
676  //<http://www.gnu.org/licenses/>.     else
677  //     {  
678  //  The GNU General Public License does not permit incorporating your program        return 0;
679  //into proprietary programs.  If your program is a subroutine library, you     }
680  //may consider it more useful to permit linking proprietary applications with  }
681  //the library.  If this is what you want to do, use the GNU Lesser General  //----------------------------------------------------------------------------------------------------
682  //Public License instead of this License.  But first, please read  void option_dedup(char *fname, char *path, int may_delete, double pause_time)
683  //<http://www.gnu.org/philosophy/why-not-lgpl.html>.  {
684  //     tFileHashRecord *parsed_recs;
685  //----------------------------------------------------------------------------------------------------     unsigned dupgroup;
686  //     unsigned count;
687  //All paths in the SHA512 file must be absolute or must be relative to the current working directory     unsigned ui;
688  //at the time this program is run.     unsigned within_path;
689  //     unsigned i_group_min, i_group_max;
690  //If any files are deleted by the program, a new SHA512 file must be generated before the program is  
691  //run again to delete files.  The reason for this restriction is that the program will never knowingly     parseinputfile(&parsed_recs, &count, fname);
692  //delete the last copy of a file.  If the SHA512 file contains the digests of files that no longer     //printf("%u records parsed.\n", count);
693  //exist, the program may unknowingly delete the last copies of files (because it believes based on     sortinternaldsbyhash(parsed_recs, count);
694  //the SHA512 file that other copies exist when in fact they do not).     sortinternalgroupfname(parsed_recs, count);
695  //  
696  //A typical method of generating an SHA512 file is to use     if (! count)
697  //        return;
698  //The SHA512 file does not need to be sorted (this program sorts it internally by hash before using it).  
699  //     dupgroup = 0;
700  //This program is designed to compile and run under Cygwin or *nix only.     i_group_min = 0;
701  //     i_group_max = 0;
702  //Usage:  
703  //   qdedup     do
704  //      Prints help information and exits.     {
705  //   qdedup ndups <sha512file>        //Advance i_group_max to the end of the group of duplicates.
706  //      Prints statistics about the number of duplicates in <sha512file>.        while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
707  //   qdedup filterdups <sha512file>        {
708  //      Analyzes duplicates and prints the filenames of groups of duplicates.  The output is designed           i_group_max++;
709  //      for hand analysis so that insight can be gained into what duplicates exist and where they        }
710  //      are located.  
711  //   qdedup dedup <sha512file> <path>        //If this is a group of duplicates.
712  //      For each group of duplicates that exists, preserves the duplicates that exist within path        if (i_group_min != i_group_max)
713  //      and removes all others.  If no copies of the duplicate exist within path, no copies of the        {
714  //      duplicate will be removed.            //Print the findings.
715  //   qdedup dedupnopath <sha512file>            printf("Duplicate group %u:\n", dupgroup);
716  //      For each group of duplicates that exists, preserves only the first (the only with lowest            for (ui = i_group_min; ui <= i_group_max; ui++)
717  //      sort-order filename).            {
718  //   qdedup dryrun <sha512file> <path>               printf("%s\n", parsed_recs[ui].fname);
719  //      Prints output indicating which files the program would remove if run with the "dedup" option,            }
720  //      but does not remove any files.  
721  //   qdedup dryrunnopath <sha512file>            dupgroup++;
722  //      Prints output indicating which files the program would remove if run with the "dedupnopath"  
723  //      option, but does not remove any files.            stdout_hline();
724  //----------------------------------------------------------------------------------------------------  
725  #include <math.h>            //Count how many of the group of duplicates are within the supplied path.
726  #include <stdio.h>            within_path = 0;
727  #include <stdlib.h>            for (ui = i_group_min; ui <= i_group_max; ui++)
728  #include <string.h>            {
729  #include <time.h>               if (is_path_member(parsed_recs[ui].fname, path))
730  #include <unistd.h>               {
731  //----------------------------------------------------------------------------------------------------                  within_path++;
732  #define LINELEN           (78)  //Number of printable characters in a line.               }
733  #define MAXLINELEN      (2000)  //The maximum number of characters that may be in a line of the            }
734                                  //SHA512 input file.  This count includes the \0 terminator, so only  
735                                  //this value minus 1 characters may be in a line.            //We have to take different actions based on whether we do or don't have any within path.
736  #define UNLINKPAUSETIME  (0.1)  //Number of seconds to pause between file unlinks (deletions).  This            //If we don't have any, we may delete nothing.
737                                  //is designed to give the user time to abort the program if desired            if (! within_path)
738                                  //before catastrophic quantities of files are deleted.            {
739  //----------------------------------------------------------------------------------------------------               printf("None of these duplicates in path--taking no action.\n");
740  //Data structure that holds the character representation of and SHA512 hash, plus the specified               //stdout_hline();
741  //filename.            }
742  typedef struct            else
743  {            {
744     char hash[129];               for (ui = i_group_min; ui <= i_group_max; ui++)
745        //512/4 = 128 characters for the hash, plus 1 character for zero terminator.               {
746     char *fname;                  if (is_path_member(parsed_recs[ui].fname, path))
747        //Filename as specified in the file, allocated via malloc() family.                  {
748  } tFileHashRecord;                     printf("Not deleting: %s\n", parsed_recs[ui].fname);
749  //----------------------------------------------------------------------------------------------------                  }
750  //----------------------------------------------------------------------------------------------------                  else
751  //-----  CHARACTER CLASSIFICATION FUNCTIONS  ---------------------------------------------------------                  {
752  //----------------------------------------------------------------------------------------------------                     printf("Deleting    : %s\n", parsed_recs[ui].fname);
753  //----------------------------------------------------------------------------------------------------                     if (may_delete)
754  //TRUE if character is part of valid hash.                     {
755  int is_valid_hash_char(char c)                        if (! unlink(parsed_recs[ui].fname))
756  {                        {
757     switch(c)                            printf("   File deleted (unlinked) successfully.\n");
758     {                        }
759        case '0':                        else
760        case '1':                        {
761        case '2':                            printf("   Failure attempting to delete (unlink) file.\n");
762        case '3':                        }
763        case '4':                     }
764        case '5':                     else
765        case '6':                     {
766        case '7':                        printf("   Dry run only.\n");
767        case '8':                     }
768        case '9':                  }
769        case 'a':  
770        case 'b':                  //w_sleep(pause_time);
771        case 'c':               }
772        case 'd':            }
773        case 'e':  
774        case 'f':           stdout_hline();
775           return(1);        }
776           break;  
777        default:        //On to the next group.
778           return(0);        i_group_max++;
779           break;        i_group_min = i_group_max;
780     }  
781  }     } while (i_group_max < (count - 1));
782  //----------------------------------------------------------------------------------------------------  }
783  //TRUE if character is part of newline sequence  //----------------------------------------------------------------------------------------------------
784  int is_newline_sequence_char(char c)  int main(int argc, char* argv[])
785  {  {
786     switch(c)     stdout_hline();
787     {     printf("Execution begins.\n");
788        case 13:     stdout_hline();
789        case 10:  
790           return(1);     if (argc == 1)
791           break;     {
792        default:     }
793           return(0);     else if ((argc == 3) && (strcmp(argv[1], "ndups") == 0))
794           break;     {
795     }        option_dups(argv[2]);
796  }     }
797       else if ((argc == 3) && (strcmp(argv[1], "filterdups") == 0))
798  //----------------------------------------------------------------------------------------------------     {
799  //----------------------------------------------------------------------------------------------------        option_filterdups(argv[2]);
800  //-----  FORMATTED OUTPUT FUNCTIONS  -----------------------------------------------------------------     }
801  //----------------------------------------------------------------------------------------------------     else if ((argc == 3) && (strcmp(argv[1], "dedupnopath") == 0))
802  //----------------------------------------------------------------------------------------------------     {
803  //Repeats a character to a stream a specified number of times.        //option_filterdups(argv[2]);
804  //     }
805  void stream_rep_char(FILE *s, char c, unsigned n)     else if ((argc == 3) && (strcmp(argv[1], "dryrunnopath") == 0))
806  {     {
807     while(n--)        //option_filterdups(argv[2]);
808     {     }
809        fprintf(s, "%c", c);     else if ((argc == 4) && (strcmp(argv[1], "dedup") == 0))
810     }     {
811  }        option_dedup(argv[2], argv[3], 1, UNLINKPAUSETIME);
812  //----------------------------------------------------------------------------------------------------     }
813  //Prints a horizontal line to a stream, including the newline.     else if ((argc == 4) && (strcmp(argv[1], "dryrun") == 0))
814  //     {
815  void stream_hline(FILE *s)        option_dedup(argv[2], argv[3], 0, UNLINKPAUSETIME/10.0);
816  {     }
817     stream_rep_char(s, '-', LINELEN);     else
818     fprintf(s, "\n");     {
819  }        printf("Unrecognized parameter form.  Try \"dedup\".\n");
820  //----------------------------------------------------------------------------------------------------     }
821  //Prints a horizontal line to a stdout, including the newline.  
822  //     //w_sleep(-3 /* UNLINKPAUSETIME*/ );
823  void stdout_hline(void)  
824  {     //stdout_hline();
825     stream_rep_char(stdout, '-', LINELEN);     printf("Execution ends.\n");
826     fprintf(stdout, "\n");     stdout_hline();
827  }  
828  //----------------------------------------------------------------------------------------------------     return 0;
829  //----------------------------------------------------------------------------------------------------  }
830  //-----  FATAL ERROR FUNCTIONS  ----------------------------------------------------------------------  //----------------------------------------------------------------------------------------------------
831  //----------------------------------------------------------------------------------------------------  
 //----------------------------------------------------------------------------------------------------  
 //Errors out fatally.  
 //  
 void fatal(const char *desc, const char *file, unsigned line)  
 {  
    stdout_hline();  
    printf("Fatal error:  %s\n", desc);  
    printf("Source file:  %s\n", file);  
    printf("Line       :  %u\n", line);  
    stdout_hline();  
    exit(1);  
 }  
 //----------------------------------------------------------------------------------------------------  
 //----------------------------------------------------------------------------------------------------  
 //-----  MEMORY ALLOCATION WRAPPERS  -----------------------------------------------------------------  
 //----------------------------------------------------------------------------------------------------  
 //----------------------------------------------------------------------------------------------------  
 //malloc() wrapper.  
 void *w_malloc(size_t nbytes)  
 {  
    void *rv;  
   
    if (!nbytes)  
    {  
       fatal("Memory allocation request for 0 bytes.", __FILE__, __LINE__);  
    }  
   
    rv = malloc(nbytes);  
   
    if (!rv)  
    {  
       fatal("Out of memory in malloc() request.", __FILE__, __LINE__);  
    }  
   
    //Zero out, just for consistency.  
    memset(rv, 0, nbytes);  
 }  
 //----------------------------------------------------------------------------------------------------  
 //realloc() wrapper.  
 void *w_realloc(void *p, size_t n)  
 {  
    void *rv;  
   
    if (!n)  
    {  
       fatal("Memory reallocation request for 0 bytes.", __FILE__, __LINE__);  
    }  
   
    if (!p)  
    {  
       fatal("Memory reallocation request with NULL pointer.", __FILE__, __LINE__);  
    }  
   
    rv = realloc(p, n);  
   
    if (!rv)  
    {  
       fatal("Out of memory in realloc() request.", __FILE__, __LINE__);  
    }  
 }  
 //----------------------------------------------------------------------------------------------------  
 //----------------------------------------------------------------------------------------------------  
 //-----  SLEEP FUNCTIONS  ----------------------------------------------------------------------------  
 //----------------------------------------------------------------------------------------------------  
 //----------------------------------------------------------------------------------------------------  
 //Sleep for a time, in seconds.  
 void w_sleep(double seconds)  
 {  
    struct timespec t;  
   
    if (seconds < 0)  
    {  
       fatal("Sleep for negative time request.", __FILE__, __LINE__);  
    }  
    else if (seconds > 3600)  
    {  
       fatal("Sleep for too long request.", __FILE__, __LINE__);  
    }  
   
    t.tv_sec  = floor(seconds);  
    t.tv_nsec = (seconds - floor(seconds)) * 1E9;  
   
    nanosleep(&t, NULL);  
 }  
 //----------------------------------------------------------------------------------------------------  
 //----------------------------------------------------------------------------------------------------  
 //-----  SHA512 FIELD READ FUNCTIONS  ----------------------------------------------------------------  
 //----------------------------------------------------------------------------------------------------  
 //----------------------------------------------------------------------------------------------------  
 //These functions read in an individual field of a standard SHA512 file generated using application  
 //of the standard sha512sum program.  
 //  
 //*rcode = 1, success.  
 //         0, legal end of file, record assigned.  
 void get_sha512file_line(FILE *s, int *rcode, tFileHashRecord *hash_rec)  
 {  
    unsigned bidx;  
    unsigned nchars;  
    int ic;  
    int exitflag;  
    int eoffound;  
    int eolfound;  
    char c;  
    char buf[MAXLINELEN];  
   
    //Zero out the buffer.  This handles string termination automatically.  
    memset(buf, 0, sizeof(buf));  
   
    //Read characters into the buffer until either hit EOF, newline, or can't  
    //fill the buffer any longer.  
    eoffound  = 0;  
    eolfound  = 0;  
    exitflag  = 0;  
    bidx      = 0;  
    do  
    {  
       ic = fgetc(s);  
       c  = ic;  
   
       if (ic == EOF)  
       {  
          eoffound  = 1;  
          eolfound  = 0;  
          nchars    = bidx;  
          exitflag  = 1;  
       }  
       else if (is_newline_sequence_char(c))  
       {  
          eoffound  = 0;  
          eolfound  = 1;  
          nchars    = bidx;  
          exitflag  = 1;  
       }  
       else if (bidx >= (MAXLINELEN - 1))  
       {  
          fatal("SHA512 hash file line too long to parse.", __FILE__, __LINE__);  
       }  
       else  
       {  
          buf[bidx] = c;  
          bidx++;  
          exitflag  = 0;  
       }  
    } while(! exitflag);  
   
    //If we encountered a newline, inch past it.  We may encounter an EOF.  
    if (eolfound)  
    {  
       exitflag = 0;  
       do  
       {  
          ic = fgetc(s);  
          c  = ic;  
   
          if (ic == EOF)  
          {  
             eoffound = 1;  
             eolfound = 0;  
             exitflag = 1;  
          }  
          else if (is_newline_sequence_char(c))  
          {  
             exitflag = 0;  
          }  
          else  
          {  
             //We hit the next line.  Put the character back.  
             eoffound = 0;  
             eolfound = 1;  
             ungetc(ic, s);  
             exitflag = 1;  
          }  
       } while(! exitflag);  
    }  
   
    //For better or worse, we have a \0-terminated line in the buffer.  
    //  
    //Zero the caller's area.  This takes care of the hash terminator as well.  
    memset(hash_rec, 0, sizeof(*hash_rec));  
   
    //Ensure that we have at least 128 characters, and they are all hex characters.  
    //Otherwise, we can't proceed.  
    if (nchars < 128)  
    {  
       fatal("SHA512 hash file line too short.", __FILE__, __LINE__);  
    }  
    else  
    {  
       for (bidx = 0; bidx < 128; bidx++)  
       {  
          if (! is_valid_hash_char(buf[bidx]))  
          {  
             fatal("Character in SHA512 hash portion of line inconsistent with hash.", __FILE__, __LINE__);  
          }  
       }  
    }  
   
    //The 129th and 130'th character must be present and must be a space and asterisk, respectively.  
    if (nchars < 130)  
    {  
       fatal("SHA512 hash file line too short.", __FILE__, __LINE__);  
    }  
    else if (buf[128] != ' ')  
    {  
       fatal("129th hash line character must be \" \".", __FILE__, __LINE__);  
    }  
    else if (buf[129] != '*')  
    {  
       fatal("130th hash line character must be \"*\".", __FILE__, __LINE__);  
    }  
   
    //There must be a 131'st character.  Beyond that, we can't qualify, because filenames may  
    //have odd characters and may be of any length.  
    if (nchars < 131)  
    {  
       fatal("SHA512 hash file line too short.", __FILE__, __LINE__);  
    }  
   
    //Copy the hash to the caller's area.  The terminator has already been inserted.  
    memcpy(&(hash_rec->hash[0]), buf, 128);  
   
    //Allocate space for the filename.  
    hash_rec->fname = w_malloc(strlen(buf+130) + 1);  
   
    //Make the copy.  
    strcpy(hash_rec->fname, buf+130);  
   
    if (eoffound)  
       *rcode = 0;  
    else  
       *rcode = 1;  
 }  
 //----------------------------------------------------------------------------------------------------  
 void parseinputfile(tFileHashRecord **parsed_recs, unsigned *count, char *fname)  
 {  
    FILE *s;  
    int rcode;  
   
    //Try to open the file for reading.  Inability is a failure.  
    s = fopen(fname, "r");  
    if (!s)  
    {  
        fatal("Hash file open failure.", __FILE__, __LINE__);  
    }  
   
    //Start off with a count of 0 and a NULL pointer.  
    *count       = 0;  
    *parsed_recs = NULL;  
   
    do  
    {  
       //For the first time, allocate space for one record.  Beyond that,  
       //expand it.  
       if (! *parsed_recs)  
       {  
          *parsed_recs = w_malloc(sizeof(tFileHashRecord));  
       }  
       else  
       {  
          *parsed_recs = w_realloc(*parsed_recs, (size_t)((*count + 1)) * sizeof(tFileHashRecord));  
       }  
   
       //Parse and fill in the space.  
       get_sha512file_line(s, &rcode, (*parsed_recs) + (*count));  
   
       //We now have one more.  
       (*count)++;  
    } while(rcode == 1);  
   
    //Try to close the file.  Inability is a failure.  
    if (fclose(s))  
    {  
        fatal("Hash file close failure.", __FILE__, __LINE__);  
    }  
 }  
 //----------------------------------------------------------------------------------------------------  
 int sortcmpascendinghash(const void *p0_in, const void *p1_in)  
 {  
    const tFileHashRecord *p0, *p1;  
   
    p0 = p0_in;  
    p1 = p1_in;  
   
    return(strcmp(p0->hash, p1->hash));  
 }  
   
 //----------------------------------------------------------------------------------------------------  
 void sortinternaldsbyhash(tFileHashRecord *parsed_recs, unsigned count)  
 {  
    qsort(parsed_recs, count, sizeof(tFileHashRecord), sortcmpascendinghash);  
 }  
 //----------------------------------------------------------------------------------------------------  
 int sortcmpascendingfname(const void *p0_in, const void *p1_in)  
 {  
    const tFileHashRecord *p0, *p1;  
   
    p0 = p0_in;  
    p1 = p1_in;  
   
    return(strcmp(p0->fname, p1->fname));  
 }  
 //----------------------------------------------------------------------------------------------------  
 //This sort has to be run after the hash sort.  Within groups of identical hashes, it sorts by  
 //ascending filename.  
 void sortinternalgroupfname(tFileHashRecord *parsed_recs, unsigned count)  
 {  
     unsigned ui;  
     unsigned i_group_min, i_group_max;  
   
     if (! count)  
        return;  
   
     i_group_min = 0;  
     i_group_max = 0;  
   
     do  
     {  
        //Advance i_group_max to the end of the group of duplicates.  
        while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))  
        {  
           i_group_max++;  
        }  
   
        if (i_group_min != i_group_max)  
        {  
           //Sort the internal group.  
           qsort(parsed_recs + i_group_min,  
                 i_group_max - i_group_min + 1,  
                 sizeof(tFileHashRecord),  
                 sortcmpascendingfname);  
        }  
   
        //On to the next group.  
        i_group_max++;  
        i_group_min = i_group_max;  
   
     } while (i_group_max < (count - 1));  
 }  
 //----------------------------------------------------------------------------------------------------  
 void printsinglerecord(tFileHashRecord *rec, unsigned elno)  
 {  
    printf("[%9u]\n", elno);  
    printf("Hash       : %s\n", rec->hash);  
    printf("Filename   : %s\n", rec->fname);  
    stdout_hline();  
 }  
 //----------------------------------------------------------------------------------------------------  
 void printinternalds(tFileHashRecord *parsed_recs, unsigned count)  
 {  
     unsigned i;  
   
     for (i=0; i<count; i++)  
     {  
        printsinglerecord(parsed_recs + i, i);  
     }  
 }  
 //----------------------------------------------------------------------------------------------------  
 void gather_dup_stats(tFileHashRecord *parsed_recs, unsigned count, unsigned *out_num_dups, unsigned *out_cumulative_dups)  
 {  
    unsigned i_group_min, i_group_max;  
   
    *out_num_dups = 0;  
    *out_cumulative_dups = 0;  
   
    if (! count)  
       return;  
   
    i_group_min = 0;  
    i_group_max = 0;  
   
    do  
    {  
       //Advance i_group_max to the end of the group of duplicates.  
       while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))  
       {  
          i_group_max++;  
       }  
   
       //Log the findings.  
       if (i_group_min != i_group_max)  
       {  
          (*out_num_dups)++;  
          (*out_cumulative_dups) += (i_group_max - i_group_min + 1);  
       }  
   
       //On to the next group.  
       i_group_max++;  
       i_group_min = i_group_max;  
   
    } while (i_group_max < (count - 1));  
 }  
 //----------------------------------------------------------------------------------------------------  
 void option_dups(char *fname)  
 {  
    tFileHashRecord *parsed_recs;  
    unsigned count, num_dups, cumulative_dups;  
   
    parseinputfile(&parsed_recs, &count, fname);  
    //printf("%u records parsed.\n", count);  
    sortinternaldsbyhash(parsed_recs, count);  
    sortinternalgroupfname(parsed_recs, count);  
    printinternalds(parsed_recs, count);  
    stdout_hline();  
    gather_dup_stats(parsed_recs, count, &num_dups, &cumulative_dups);  
    printf("Number of duplicated files  : %u\n", num_dups);  
    if (num_dups)  
    {  
       printf("Average number of duplicates: %.2f\n", (double)cumulative_dups/(double)num_dups);  
    }  
 }  
 //----------------------------------------------------------------------------------------------------  
 void option_filterdups(char *fname)  
 {  
    tFileHashRecord *parsed_recs;  
    unsigned dupgroup;  
    unsigned count;  
    unsigned ui;  
    unsigned i_group_min, i_group_max;  
   
    parseinputfile(&parsed_recs, &count, fname);  
    //printf("%u records parsed.\n", count);  
    sortinternaldsbyhash(parsed_recs, count);  
    sortinternalgroupfname(parsed_recs, count);  
   
    if (! count)  
       return;  
   
    dupgroup = 0;  
    i_group_min = 0;  
    i_group_max = 0;  
   
    do  
    {  
       //Advance i_group_max to the end of the group of duplicates.  
       while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))  
       {  
          i_group_max++;  
       }  
   
       //Print the findings.  
       if (i_group_min != i_group_max)  
       {  
           printf("Duplicate group %u:\n", dupgroup);  
           for (ui = i_group_min; ui <= i_group_max; ui++)  
           {  
              printf("%s\n", parsed_recs[ui].fname);  
           }  
   
           dupgroup++;  
   
           stdout_hline();  
       }  
   
       //On to the next group.  
       i_group_max++;  
       i_group_min = i_group_max;  
   
    } while (i_group_max < (count - 1));  
 }  
 //----------------------------------------------------------------------------------------------------  
 //Returns true if the filename is within the specified path, or false otherwise.  
 int is_path_member(const char *fname, const char *path)  
 {  
    if (strlen(fname) == 0)  
    {  
       fatal("Zero-length filename.", __FILE__, __LINE__);  
    }  
    else if (strlen(path) == 0)  
    {  
       fatal("Zero-length path.", __FILE__, __LINE__);  
    }  
    else if (path[strlen(path) - 1] != '/')  
    {  
       fatal("Paths must canonically end with forward slash character.", __FILE__, __LINE__);  
    }  
    else if (strlen(fname) <= strlen(path))  
    {  
       //Can't be in the path because filename is not longer than path name.  
       return 0;  
    }  
    else if (memcmp(fname, path, strlen(path)) == 0)  
    {  
       return 1;  
    }  
    else  
    {    
       return 0;  
    }  
 }  
 //----------------------------------------------------------------------------------------------------  
 void option_dedup(char *fname, char *path, int may_delete, double pause_time)  
 {  
    tFileHashRecord *parsed_recs;  
    unsigned dupgroup;  
    unsigned count;  
    unsigned ui;  
    unsigned within_path;  
    unsigned i_group_min, i_group_max;  
   
    parseinputfile(&parsed_recs, &count, fname);  
    //printf("%u records parsed.\n", count);  
    sortinternaldsbyhash(parsed_recs, count);  
    sortinternalgroupfname(parsed_recs, count);  
   
    if (! count)  
       return;  
   
    dupgroup = 0;  
    i_group_min = 0;  
    i_group_max = 0;  
   
    do  
    {  
       //Advance i_group_max to the end of the group of duplicates.  
       while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))  
       {  
          i_group_max++;  
       }  
   
       //If this is a group of duplicates.  
       if (i_group_min != i_group_max)  
       {  
           //Print the findings.  
           printf("Duplicate group %u:\n", dupgroup);  
           for (ui = i_group_min; ui <= i_group_max; ui++)  
           {  
              printf("%s\n", parsed_recs[ui].fname);  
           }  
   
           dupgroup++;  
   
           stdout_hline();  
   
           //Count how many of the group of duplicates are within the supplied path.  
           within_path = 0;  
           for (ui = i_group_min; ui <= i_group_max; ui++)  
           {  
              if (is_path_member(parsed_recs[ui].fname, path))  
              {  
                 within_path++;  
              }  
           }  
   
           //We have to take different actions based on whether we do or don't have any within path.  
           //If we don't have any, we may delete nothing.  
           if (! within_path)  
           {  
              printf("None of these duplicates in path--taking no action.\n");  
              //stdout_hline();  
           }  
           else  
           {  
              for (ui = i_group_min; ui <= i_group_max; ui++)  
              {  
                 if (is_path_member(parsed_recs[ui].fname, path))  
                 {  
                    printf("Not deleting: %s\n", parsed_recs[ui].fname);  
                 }  
                 else  
                 {  
                    printf("Deleting    : %s\n", parsed_recs[ui].fname);  
                    if (may_delete)  
                    {  
                       if (! unlink(parsed_recs[ui].fname))  
                       {  
                           printf("   File deleted (unlinked) successfully.\n");  
                       }  
                       else  
                       {  
                           printf("   Failure attempting to delete (unlink) file.\n");  
                       }  
                    }  
                    else  
                    {  
                       printf("   Dry run only.\n");  
                    }  
                 }  
   
                 //w_sleep(pause_time);  
              }  
           }  
   
          stdout_hline();  
       }  
   
       //On to the next group.  
       i_group_max++;  
       i_group_min = i_group_max;  
   
    } while (i_group_max < (count - 1));  
 }  
 //----------------------------------------------------------------------------------------------------  
 int main(int argc, char* argv[])  
 {  
    stdout_hline();  
    printf("Execution begins.\n");  
    stdout_hline();  
   
    if (argc == 1)  
    {  
    }  
    else if ((argc == 3) && (strcmp(argv[1], "ndups") == 0))  
    {  
       option_dups(argv[2]);  
    }  
    else if ((argc == 3) && (strcmp(argv[1], "filterdups") == 0))  
    {  
       option_filterdups(argv[2]);  
    }  
    else if ((argc == 3) && (strcmp(argv[1], "dedupnopath") == 0))  
    {  
       //option_filterdups(argv[2]);  
    }  
    else if ((argc == 3) && (strcmp(argv[1], "dryrunnopath") == 0))  
    {  
       //option_filterdups(argv[2]);  
    }  
    else if ((argc == 4) && (strcmp(argv[1], "dedup") == 0))  
    {  
       option_dedup(argv[2], argv[3], 1, UNLINKPAUSETIME);  
    }  
    else if ((argc == 4) && (strcmp(argv[1], "dryrun") == 0))  
    {  
       option_dedup(argv[2], argv[3], 0, UNLINKPAUSETIME/10.0);  
    }  
    else  
    {  
       printf("Unrecognized parameter form.  Try \"dedup\".\n");  
    }  
   
    //w_sleep(-3 /* UNLINKPAUSETIME*/ );  
   
    //stdout_hline();  
    printf("Execution ends.\n");  
    stdout_hline();  
   
    return 0;  
 }  
 //----------------------------------------------------------------------------------------------------  
   

Legend:
Removed from v.12  
changed lines
  Added in v.74

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25