/[dtapublic]/projs/trunk/projs/20161007_dedup/qdedup.c
ViewVC logotype

Annotation of /projs/trunk/projs/20161007_dedup/qdedup.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 71 - (hide annotations) (download)
Sat Nov 5 11:07:06 2016 UTC (8 years ago) by dashley
Original Path: projs/trunk/projs/20161007_dedup/dedup.c
File MIME type: text/plain
File size: 62130 byte(s)
Set EOL properties appropriately to facilitate simultaneous Linux and Windows development.
1 dashley 71 //----------------------------------------------------------------------------------------------------
2     //qdedup.c
3     //----------------------------------------------------------------------------------------------------
4     //Quick and dirty program to eliminate duplicates from a file tree. A file containing the SHA512
5     //hashes of all the files to be considered must already exist. The program will eliminate duplicates
6     //outside a single specified directory.
7     //----------------------------------------------------------------------------------------------------
8     //Provided under the GNU GENERAL PUBLIC LICENSE, VERSION 3, reproduced immediately below.
9     //----------------------------------------------------------------------------------------------------
10     // GNU GENERAL PUBLIC LICENSE
11     // Version 3, 29 June 2007
12     //
13     // Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
14     // Everyone is permitted to copy and distribute verbatim copies
15     // of this license document, but changing it is not allowed.
16     //
17     // Preamble
18     //
19     // The GNU General Public License is a free, copyleft license for
20     //software and other kinds of works.
21     //
22     // The licenses for most software and other practical works are designed
23     //to take away your freedom to share and change the works. By contrast,
24     //the GNU General Public License is intended to guarantee your freedom to
25     //share and change all versions of a program--to make sure it remains free
26     //software for all its users. We, the Free Software Foundation, use the
27     //GNU General Public License for most of our software; it applies also to
28     //any other work released this way by its authors. You can apply it to
29     //your programs, too.
30     //
31     // When we speak of free software, we are referring to freedom, not
32     //price. Our General Public Licenses are designed to make sure that you
33     //have the freedom to distribute copies of free software (and charge for
34     //them if you wish), that you receive source code or can get it if you
35     //want it, that you can change the software or use pieces of it in new
36     //free programs, and that you know you can do these things.
37     //
38     // To protect your rights, we need to prevent others from denying you
39     //these rights or asking you to surrender the rights. Therefore, you have
40     //certain responsibilities if you distribute copies of the software, or if
41     //you modify it: responsibilities to respect the freedom of others.
42     //
43     // For example, if you distribute copies of such a program, whether
44     //gratis or for a fee, you must pass on to the recipients the same
45     //freedoms that you received. You must make sure that they, too, receive
46     //or can get the source code. And you must show them these terms so they
47     //know their rights.
48     //
49     // Developers that use the GNU GPL protect your rights with two steps:
50     //(1) assert copyright on the software, and (2) offer you this License
51     //giving you legal permission to copy, distribute and/or modify it.
52     //
53     // For the developers' and authors' protection, the GPL clearly explains
54     //that there is no warranty for this free software. For both users' and
55     //authors' sake, the GPL requires that modified versions be marked as
56     //changed, so that their problems will not be attributed erroneously to
57     //authors of previous versions.
58     //
59     // Some devices are designed to deny users access to install or run
60     //modified versions of the software inside them, although the manufacturer
61     //can do so. This is fundamentally incompatible with the aim of
62     //protecting users' freedom to change the software. The systematic
63     //pattern of such abuse occurs in the area of products for individuals to
64     //use, which is precisely where it is most unacceptable. Therefore, we
65     //have designed this version of the GPL to prohibit the practice for those
66     //products. If such problems arise substantially in other domains, we
67     //stand ready to extend this provision to those domains in future versions
68     //of the GPL, as needed to protect the freedom of users.
69     //
70     // Finally, every program is threatened constantly by software patents.
71     //States should not allow patents to restrict development and use of
72     //software on general-purpose computers, but in those that do, we wish to
73     //avoid the special danger that patents applied to a free program could
74     //make it effectively proprietary. To prevent this, the GPL assures that
75     //patents cannot be used to render the program non-free.
76     //
77     // The precise terms and conditions for copying, distribution and
78     //modification follow.
79     //
80     // TERMS AND CONDITIONS
81     //
82     // 0. Definitions.
83     //
84     // "This License" refers to version 3 of the GNU General Public License.
85     //
86     // "Copyright" also means copyright-like laws that apply to other kinds of
87     //works, such as semiconductor masks.
88     //
89     // "The Program" refers to any copyrightable work licensed under this
90     //License. Each licensee is addressed as "you". "Licensees" and
91     //"recipients" may be individuals or organizations.
92     //
93     // To "modify" a work means to copy from or adapt all or part of the work
94     //in a fashion requiring copyright permission, other than the making of an
95     //exact copy. The resulting work is called a "modified version" of the
96     //earlier work or a work "based on" the earlier work.
97     //
98     // A "covered work" means either the unmodified Program or a work based
99     //on the Program.
100     //
101     // To "propagate" a work means to do anything with it that, without
102     //permission, would make you directly or secondarily liable for
103     //infringement under applicable copyright law, except executing it on a
104     //computer or modifying a private copy. Propagation includes copying,
105     //distribution (with or without modification), making available to the
106     //public, and in some countries other activities as well.
107     //
108     // To "convey" a work means any kind of propagation that enables other
109     //parties to make or receive copies. Mere interaction with a user through
110     //a computer network, with no transfer of a copy, is not conveying.
111     //
112     // An interactive user interface displays "Appropriate Legal Notices"
113     //to the extent that it includes a convenient and prominently visible
114     //feature that (1) displays an appropriate copyright notice, and (2)
115     //tells the user that there is no warranty for the work (except to the
116     //extent that warranties are provided), that licensees may convey the
117     //work under this License, and how to view a copy of this License. If
118     //the interface presents a list of user commands or options, such as a
119     //menu, a prominent item in the list meets this criterion.
120     //
121     // 1. Source Code.
122     //
123     // The "source code" for a work means the preferred form of the work
124     //for making modifications to it. "Object code" means any non-source
125     //form of a work.
126     //
127     // A "Standard Interface" means an interface that either is an official
128     //standard defined by a recognized standards body, or, in the case of
129     //interfaces specified for a particular programming language, one that
130     //is widely used among developers working in that language.
131     //
132     // The "System Libraries" of an executable work include anything, other
133     //than the work as a whole, that (a) is included in the normal form of
134     //packaging a Major Component, but which is not part of that Major
135     //Component, and (b) serves only to enable use of the work with that
136     //Major Component, or to implement a Standard Interface for which an
137     //implementation is available to the public in source code form. A
138     //"Major Component", in this context, means a major essential component
139     //(kernel, window system, and so on) of the specific operating system
140     //(if any) on which the executable work runs, or a compiler used to
141     //produce the work, or an object code interpreter used to run it.
142     //
143     // The "Corresponding Source" for a work in object code form means all
144     //the source code needed to generate, install, and (for an executable
145     //work) run the object code and to modify the work, including scripts to
146     //control those activities. However, it does not include the work's
147     //System Libraries, or general-purpose tools or generally available free
148     //programs which are used unmodified in performing those activities but
149     //which are not part of the work. For example, Corresponding Source
150     //includes interface definition files associated with source files for
151     //the work, and the source code for shared libraries and dynamically
152     //linked subprograms that the work is specifically designed to require,
153     //such as by intimate data communication or control flow between those
154     //subprograms and other parts of the work.
155     //
156     // The Corresponding Source need not include anything that users
157     //can regenerate automatically from other parts of the Corresponding
158     //Source.
159     //
160     // The Corresponding Source for a work in source code form is that
161     //same work.
162     //
163     // 2. Basic Permissions.
164     //
165     // All rights granted under this License are granted for the term of
166     //copyright on the Program, and are irrevocable provided the stated
167     //conditions are met. This License explicitly affirms your unlimited
168     //permission to run the unmodified Program. The output from running a
169     //covered work is covered by this License only if the output, given its
170     //content, constitutes a covered work. This License acknowledges your
171     //rights of fair use or other equivalent, as provided by copyright law.
172     //
173     // You may make, run and propagate covered works that you do not
174     //convey, without conditions so long as your license otherwise remains
175     //in force. You may convey covered works to others for the sole purpose
176     //of having them make modifications exclusively for you, or provide you
177     //with facilities for running those works, provided that you comply with
178     //the terms of this License in conveying all material for which you do
179     //not control copyright. Those thus making or running the covered works
180     //for you must do so exclusively on your behalf, under your direction
181     //and control, on terms that prohibit them from making any copies of
182     //your copyrighted material outside their relationship with you.
183     //
184     // Conveying under any other circumstances is permitted solely under
185     //the conditions stated below. Sublicensing is not allowed; section 10
186     //makes it unnecessary.
187     //
188     // 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
189     //
190     // No covered work shall be deemed part of an effective technological
191     //measure under any applicable law fulfilling obligations under article
192     //11 of the WIPO copyright treaty adopted on 20 December 1996, or
193     //similar laws prohibiting or restricting circumvention of such
194     //measures.
195     //
196     // When you convey a covered work, you waive any legal power to forbid
197     //circumvention of technological measures to the extent such circumvention
198     //is effected by exercising rights under this License with respect to
199     //the covered work, and you disclaim any intention to limit operation or
200     //modification of the work as a means of enforcing, against the work's
201     //users, your or third parties' legal rights to forbid circumvention of
202     //technological measures.
203     //
204     // 4. Conveying Verbatim Copies.
205     //
206     // You may convey verbatim copies of the Program's source code as you
207     //receive it, in any medium, provided that you conspicuously and
208     //appropriately publish on each copy an appropriate copyright notice;
209     //keep intact all notices stating that this License and any
210     //non-permissive terms added in accord with section 7 apply to the code;
211     //keep intact all notices of the absence of any warranty; and give all
212     //recipients a copy of this License along with the Program.
213     //
214     // You may charge any price or no price for each copy that you convey,
215     //and you may offer support or warranty protection for a fee.
216     //
217     // 5. Conveying Modified Source Versions.
218     //
219     // You may convey a work based on the Program, or the modifications to
220     //produce it from the Program, in the form of source code under the
221     //terms of section 4, provided that you also meet all of these conditions:
222     //
223     // a) The work must carry prominent notices stating that you modified
224     // it, and giving a relevant date.
225     //
226     // b) The work must carry prominent notices stating that it is
227     // released under this License and any conditions added under section
228     // 7. This requirement modifies the requirement in section 4 to
229     // "keep intact all notices".
230     //
231     // c) You must license the entire work, as a whole, under this
232     // License to anyone who comes into possession of a copy. This
233     // License will therefore apply, along with any applicable section 7
234     // additional terms, to the whole of the work, and all its parts,
235     // regardless of how they are packaged. This License gives no
236     // permission to license the work in any other way, but it does not
237     // invalidate such permission if you have separately received it.
238     //
239     // d) If the work has interactive user interfaces, each must display
240     // Appropriate Legal Notices; however, if the Program has interactive
241     // interfaces that do not display Appropriate Legal Notices, your
242     // work need not make them do so.
243     //
244     // A compilation of a covered work with other separate and independent
245     //works, which are not by their nature extensions of the covered work,
246     //and which are not combined with it such as to form a larger program,
247     //in or on a volume of a storage or distribution medium, is called an
248     //"aggregate" if the compilation and its resulting copyright are not
249     //used to limit the access or legal rights of the compilation's users
250     //beyond what the individual works permit. Inclusion of a covered work
251     //in an aggregate does not cause this License to apply to the other
252     //parts of the aggregate.
253     //
254     // 6. Conveying Non-Source Forms.
255     //
256     // You may convey a covered work in object code form under the terms
257     //of sections 4 and 5, provided that you also convey the
258     //machine-readable Corresponding Source under the terms of this License,
259     //in one of these ways:
260     //
261     // a) Convey the object code in, or embodied in, a physical product
262     // (including a physical distribution medium), accompanied by the
263     // Corresponding Source fixed on a durable physical medium
264     // customarily used for software interchange.
265     //
266     // b) Convey the object code in, or embodied in, a physical product
267     // (including a physical distribution medium), accompanied by a
268     // written offer, valid for at least three years and valid for as
269     // long as you offer spare parts or customer support for that product
270     // model, to give anyone who possesses the object code either (1) a
271     // copy of the Corresponding Source for all the software in the
272     // product that is covered by this License, on a durable physical
273     // medium customarily used for software interchange, for a price no
274     // more than your reasonable cost of physically performing this
275     // conveying of source, or (2) access to copy the
276     // Corresponding Source from a network server at no charge.
277     //
278     // c) Convey individual copies of the object code with a copy of the
279     // written offer to provide the Corresponding Source. This
280     // alternative is allowed only occasionally and noncommercially, and
281     // only if you received the object code with such an offer, in accord
282     // with subsection 6b.
283     //
284     // d) Convey the object code by offering access from a designated
285     // place (gratis or for a charge), and offer equivalent access to the
286     // Corresponding Source in the same way through the same place at no
287     // further charge. You need not require recipients to copy the
288     // Corresponding Source along with the object code. If the place to
289     // copy the object code is a network server, the Corresponding Source
290     // may be on a different server (operated by you or a third party)
291     // that supports equivalent copying facilities, provided you maintain
292     // clear directions next to the object code saying where to find the
293     // Corresponding Source. Regardless of what server hosts the
294     // Corresponding Source, you remain obligated to ensure that it is
295     // available for as long as needed to satisfy these requirements.
296     //
297     // e) Convey the object code using peer-to-peer transmission, provided
298     // you inform other peers where the object code and Corresponding
299     // Source of the work are being offered to the general public at no
300     // charge under subsection 6d.
301     //
302     // A separable portion of the object code, whose source code is excluded
303     //from the Corresponding Source as a System Library, need not be
304     //included in conveying the object code work.
305     //
306     // A "User Product" is either (1) a "consumer product", which means any
307     //tangible personal property which is normally used for personal, family,
308     //or household purposes, or (2) anything designed or sold for incorporation
309     //into a dwelling. In determining whether a product is a consumer product,
310     //doubtful cases shall be resolved in favor of coverage. For a particular
311     //product received by a particular user, "normally used" refers to a
312     //typical or common use of that class of product, regardless of the status
313     //of the particular user or of the way in which the particular user
314     //actually uses, or expects or is expected to use, the product. A product
315     //is a consumer product regardless of whether the product has substantial
316     //commercial, industrial or non-consumer uses, unless such uses represent
317     //the only significant mode of use of the product.
318     //
319     // "Installation Information" for a User Product means any methods,
320     //procedures, authorization keys, or other information required to install
321     //and execute modified versions of a covered work in that User Product from
322     //a modified version of its Corresponding Source. The information must
323     //suffice to ensure that the continued functioning of the modified object
324     //code is in no case prevented or interfered with solely because
325     //modification has been made.
326     //
327     // If you convey an object code work under this section in, or with, or
328     //specifically for use in, a User Product, and the conveying occurs as
329     //part of a transaction in which the right of possession and use of the
330     //User Product is transferred to the recipient in perpetuity or for a
331     //fixed term (regardless of how the transaction is characterized), the
332     //Corresponding Source conveyed under this section must be accompanied
333     //by the Installation Information. But this requirement does not apply
334     //if neither you nor any third party retains the ability to install
335     //modified object code on the User Product (for example, the work has
336     //been installed in ROM).
337     //
338     // The requirement to provide Installation Information does not include a
339     //requirement to continue to provide support service, warranty, or updates
340     //for a work that has been modified or installed by the recipient, or for
341     //the User Product in which it has been modified or installed. Access to a
342     //network may be denied when the modification itself materially and
343     //adversely affects the operation of the network or violates the rules and
344     //protocols for communication across the network.
345     //
346     // Corresponding Source conveyed, and Installation Information provided,
347     //in accord with this section must be in a format that is publicly
348     //documented (and with an implementation available to the public in
349     //source code form), and must require no special password or key for
350     //unpacking, reading or copying.
351     //
352     // 7. Additional Terms.
353     //
354     // "Additional permissions" are terms that supplement the terms of this
355     //License by making exceptions from one or more of its conditions.
356     //Additional permissions that are applicable to the entire Program shall
357     //be treated as though they were included in this License, to the extent
358     //that they are valid under applicable law. If additional permissions
359     //apply only to part of the Program, that part may be used separately
360     //under those permissions, but the entire Program remains governed by
361     //this License without regard to the additional permissions.
362     //
363     // When you convey a copy of a covered work, you may at your option
364     //remove any additional permissions from that copy, or from any part of
365     //it. (Additional permissions may be written to require their own
366     //removal in certain cases when you modify the work.) You may place
367     //additional permissions on material, added by you to a covered work,
368     //for which you have or can give appropriate copyright permission.
369     //
370     // Notwithstanding any other provision of this License, for material you
371     //add to a covered work, you may (if authorized by the copyright holders of
372     //that material) supplement the terms of this License with terms:
373     //
374     // a) Disclaiming warranty or limiting liability differently from the
375     // terms of sections 15 and 16 of this License; or
376     //
377     // b) Requiring preservation of specified reasonable legal notices or
378     // author attributions in that material or in the Appropriate Legal
379     // Notices displayed by works containing it; or
380     //
381     // c) Prohibiting misrepresentation of the origin of that material, or
382     // requiring that modified versions of such material be marked in
383     // reasonable ways as different from the original version; or
384     //
385     // d) Limiting the use for publicity purposes of names of licensors or
386     // authors of the material; or
387     //
388     // e) Declining to grant rights under trademark law for use of some
389     // trade names, trademarks, or service marks; or
390     //
391     // f) Requiring indemnification of licensors and authors of that
392     // material by anyone who conveys the material (or modified versions of
393     // it) with contractual assumptions of liability to the recipient, for
394     // any liability that these contractual assumptions directly impose on
395     // those licensors and authors.
396     //
397     // All other non-permissive additional terms are considered "further
398     //restrictions" within the meaning of section 10. If the Program as you
399     //received it, or any part of it, contains a notice stating that it is
400     //governed by this License along with a term that is a further
401     //restriction, you may remove that term. If a license document contains
402     //a further restriction but permits relicensing or conveying under this
403     //License, you may add to a covered work material governed by the terms
404     //of that license document, provided that the further restriction does
405     //not survive such relicensing or conveying.
406     //
407     // If you add terms to a covered work in accord with this section, you
408     //must place, in the relevant source files, a statement of the
409     //additional terms that apply to those files, or a notice indicating
410     //where to find the applicable terms.
411     //
412     // Additional terms, permissive or non-permissive, may be stated in the
413     //form of a separately written license, or stated as exceptions;
414     //the above requirements apply either way.
415     //
416     // 8. Termination.
417     //
418     // You may not propagate or modify a covered work except as expressly
419     //provided under this License. Any attempt otherwise to propagate or
420     //modify it is void, and will automatically terminate your rights under
421     //this License (including any patent licenses granted under the third
422     //paragraph of section 11).
423     //
424     // However, if you cease all violation of this License, then your
425     //license from a particular copyright holder is reinstated (a)
426     //provisionally, unless and until the copyright holder explicitly and
427     //finally terminates your license, and (b) permanently, if the copyright
428     //holder fails to notify you of the violation by some reasonable means
429     //prior to 60 days after the cessation.
430     //
431     // Moreover, your license from a particular copyright holder is
432     //reinstated permanently if the copyright holder notifies you of the
433     //violation by some reasonable means, this is the first time you have
434     //received notice of violation of this License (for any work) from that
435     //copyright holder, and you cure the violation prior to 30 days after
436     //your receipt of the notice.
437     //
438     // Termination of your rights under this section does not terminate the
439     //licenses of parties who have received copies or rights from you under
440     //this License. If your rights have been terminated and not permanently
441     //reinstated, you do not qualify to receive new licenses for the same
442     //material under section 10.
443     //
444     // 9. Acceptance Not Required for Having Copies.
445     //
446     // You are not required to accept this License in order to receive or
447     //run a copy of the Program. Ancillary propagation of a covered work
448     //occurring solely as a consequence of using peer-to-peer transmission
449     //to receive a copy likewise does not require acceptance. However,
450     //nothing other than this License grants you permission to propagate or
451     //modify any covered work. These actions infringe copyright if you do
452     //not accept this License. Therefore, by modifying or propagating a
453     //covered work, you indicate your acceptance of this License to do so.
454     //
455     // 10. Automatic Licensing of Downstream Recipients.
456     //
457     // Each time you convey a covered work, the recipient automatically
458     //receives a license from the original licensors, to run, modify and
459     //propagate that work, subject to this License. You are not responsible
460     //for enforcing compliance by third parties with this License.
461     //
462     // An "entity transaction" is a transaction transferring control of an
463     //organization, or substantially all assets of one, or subdividing an
464     //organization, or merging organizations. If propagation of a covered
465     //work results from an entity transaction, each party to that
466     //transaction who receives a copy of the work also receives whatever
467     //licenses to the work the party's predecessor in interest had or could
468     //give under the previous paragraph, plus a right to possession of the
469     //Corresponding Source of the work from the predecessor in interest, if
470     //the predecessor has it or can get it with reasonable efforts.
471     //
472     // You may not impose any further restrictions on the exercise of the
473     //rights granted or affirmed under this License. For example, you may
474     //not impose a license fee, royalty, or other charge for exercise of
475     //rights granted under this License, and you may not initiate litigation
476     //(including a cross-claim or counterclaim in a lawsuit) alleging that
477     //any patent claim is infringed by making, using, selling, offering for
478     //sale, or importing the Program or any portion of it.
479     //
480     // 11. Patents.
481     //
482     // A "contributor" is a copyright holder who authorizes use under this
483     //License of the Program or a work on which the Program is based. The
484     //work thus licensed is called the contributor's "contributor version".
485     //
486     // A contributor's "essential patent claims" are all patent claims
487     //owned or controlled by the contributor, whether already acquired or
488     //hereafter acquired, that would be infringed by some manner, permitted
489     //by this License, of making, using, or selling its contributor version,
490     //but do not include claims that would be infringed only as a
491     //consequence of further modification of the contributor version. For
492     //purposes of this definition, "control" includes the right to grant
493     //patent sublicenses in a manner consistent with the requirements of
494     //this License.
495     //
496     // Each contributor grants you a non-exclusive, worldwide, royalty-free
497     //patent license under the contributor's essential patent claims, to
498     //make, use, sell, offer for sale, import and otherwise run, modify and
499     //propagate the contents of its contributor version.
500     //
501     // In the following three paragraphs, a "patent license" is any express
502     //agreement or commitment, however denominated, not to enforce a patent
503     //(such as an express permission to practice a patent or covenant not to
504     //sue for patent infringement). To "grant" such a patent license to a
505     //party means to make such an agreement or commitment not to enforce a
506     //patent against the party.
507     //
508     // If you convey a covered work, knowingly relying on a patent license,
509     //and the Corresponding Source of the work is not available for anyone
510     //to copy, free of charge and under the terms of this License, through a
511     //publicly available network server or other readily accessible means,
512     //then you must either (1) cause the Corresponding Source to be so
513     //available, or (2) arrange to deprive yourself of the benefit of the
514     //patent license for this particular work, or (3) arrange, in a manner
515     //consistent with the requirements of this License, to extend the patent
516     //license to downstream recipients. "Knowingly relying" means you have
517     //actual knowledge that, but for the patent license, your conveying the
518     //covered work in a country, or your recipient's use of the covered work
519     //in a country, would infringe one or more identifiable patents in that
520     //country that you have reason to believe are valid.
521     //
522     // If, pursuant to or in connection with a single transaction or
523     //arrangement, you convey, or propagate by procuring conveyance of, a
524     //covered work, and grant a patent license to some of the parties
525     //receiving the covered work authorizing them to use, propagate, modify
526     //or convey a specific copy of the covered work, then the patent license
527     //you grant is automatically extended to all recipients of the covered
528     //work and works based on it.
529     //
530     // A patent license is "discriminatory" if it does not include within
531     //the scope of its coverage, prohibits the exercise of, or is
532     //conditioned on the non-exercise of one or more of the rights that are
533     //specifically granted under this License. You may not convey a covered
534     //work if you are a party to an arrangement with a third party that is
535     //in the business of distributing software, under which you make payment
536     //to the third party based on the extent of your activity of conveying
537     //the work, and under which the third party grants, to any of the
538     //parties who would receive the covered work from you, a discriminatory
539     //patent license (a) in connection with copies of the covered work
540     //conveyed by you (or copies made from those copies), or (b) primarily
541     //for and in connection with specific products or compilations that
542     //contain the covered work, unless you entered into that arrangement,
543     //or that patent license was granted, prior to 28 March 2007.
544     //
545     // Nothing in this License shall be construed as excluding or limiting
546     //any implied license or other defenses to infringement that may
547     //otherwise be available to you under applicable patent law.
548     //
549     // 12. No Surrender of Others' Freedom.
550     //
551     // If conditions are imposed on you (whether by court order, agreement or
552     //otherwise) that contradict the conditions of this License, they do not
553     //excuse you from the conditions of this License. If you cannot convey a
554     //covered work so as to satisfy simultaneously your obligations under this
555     //License and any other pertinent obligations, then as a consequence you may
556     //not convey it at all. For example, if you agree to terms that obligate you
557     //to collect a royalty for further conveying from those to whom you convey
558     //the Program, the only way you could satisfy both those terms and this
559     //License would be to refrain entirely from conveying the Program.
560     //
561     // 13. Use with the GNU Affero General Public License.
562     //
563     // Notwithstanding any other provision of this License, you have
564     //permission to link or combine any covered work with a work licensed
565     //under version 3 of the GNU Affero General Public License into a single
566     //combined work, and to convey the resulting work. The terms of this
567     //License will continue to apply to the part which is the covered work,
568     //but the special requirements of the GNU Affero General Public License,
569     //section 13, concerning interaction through a network will apply to the
570     //combination as such.
571     //
572     // 14. Revised Versions of this License.
573     //
574     // The Free Software Foundation may publish revised and/or new versions of
575     //the GNU General Public License from time to time. Such new versions will
576     //be similar in spirit to the present version, but may differ in detail to
577     //address new problems or concerns.
578     //
579     // Each version is given a distinguishing version number. If the
580     //Program specifies that a certain numbered version of the GNU General
581     //Public License "or any later version" applies to it, you have the
582     //option of following the terms and conditions either of that numbered
583     //version or of any later version published by the Free Software
584     //Foundation. If the Program does not specify a version number of the
585     //GNU General Public License, you may choose any version ever published
586     //by the Free Software Foundation.
587     //
588     // If the Program specifies that a proxy can decide which future
589     //versions of the GNU General Public License can be used, that proxy's
590     //public statement of acceptance of a version permanently authorizes you
591     //to choose that version for the Program.
592     //
593     // Later license versions may give you additional or different
594     //permissions. However, no additional obligations are imposed on any
595     //author or copyright holder as a result of your choosing to follow a
596     //later version.
597     //
598     // 15. Disclaimer of Warranty.
599     //
600     // THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
601     //APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
602     //HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
603     //OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
604     //THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
605     //PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
606     //IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
607     //ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
608     //
609     // 16. Limitation of Liability.
610     //
611     // IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
612     //WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
613     //THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
614     //GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
615     //USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
616     //DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
617     //PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
618     //EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
619     //SUCH DAMAGES.
620     //
621     // 17. Interpretation of Sections 15 and 16.
622     //
623     // If the disclaimer of warranty and limitation of liability provided
624     //above cannot be given local legal effect according to their terms,
625     //reviewing courts shall apply local law that most closely approximates
626     //an absolute waiver of all civil liability in connection with the
627     //Program, unless a warranty or assumption of liability accompanies a
628     //copy of the Program in return for a fee.
629     //
630     // END OF TERMS AND CONDITIONS
631     //
632     // How to Apply These Terms to Your New Programs
633     //
634     // If you develop a new program, and you want it to be of the greatest
635     //possible use to the public, the best way to achieve this is to make it
636     //free software which everyone can redistribute and change under these terms.
637     //
638     // To do so, attach the following notices to the program. It is safest
639     //to attach them to the start of each source file to most effectively
640     //state the exclusion of warranty; and each file should have at least
641     //the "copyright" line and a pointer to where the full notice is found.
642     //
643     // <one line to give the program's name and a brief idea of what it does.>
644     // Copyright (C) <year> <name of author>
645     //
646     // This program is free software: you can redistribute it and/or modify
647     // it under the terms of the GNU General Public License as published by
648     // the Free Software Foundation, either version 3 of the License, or
649     // (at your option) any later version.
650     //
651     // This program is distributed in the hope that it will be useful,
652     // but WITHOUT ANY WARRANTY; without even the implied warranty of
653     // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
654     // GNU General Public License for more details.
655     //
656     // You should have received a copy of the GNU General Public License
657     // along with this program. If not, see <http://www.gnu.org/licenses/>.
658     //
659     //Also add information on how to contact you by electronic and paper mail.
660     //
661     // If the program does terminal interaction, make it output a short
662     //notice like this when it starts in an interactive mode:
663     //
664     // <program> Copyright (C) <year> <name of author>
665     // This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
666     // This is free software, and you are welcome to redistribute it
667     // under certain conditions; type `show c' for details.
668     //
669     //The hypothetical commands `show w' and `show c' should show the appropriate
670     //parts of the General Public License. Of course, your program's commands
671     //might be different; for a GUI interface, you would use an "about box".
672     //
673     // You should also get your employer (if you work as a programmer) or school,
674     //if any, to sign a "copyright disclaimer" for the program, if necessary.
675     //For more information on this, and how to apply and follow the GNU GPL, see
676     //<http://www.gnu.org/licenses/>.
677     //
678     // The GNU General Public License does not permit incorporating your program
679     //into proprietary programs. If your program is a subroutine library, you
680     //may consider it more useful to permit linking proprietary applications with
681     //the library. If this is what you want to do, use the GNU Lesser General
682     //Public License instead of this License. But first, please read
683     //<http://www.gnu.org/philosophy/why-not-lgpl.html>.
684     //
685     //----------------------------------------------------------------------------------------------------
686     //
687     //All paths in the SHA512 file must be absolute or must be relative to the current working directory
688     //at the time this program is run.
689     //
690     //If any files are deleted by the program, a new SHA512 file must be generated before the program is
691     //run again to delete files. The reason for this restriction is that the program will never knowingly
692     //delete the last copy of a file. If the SHA512 file contains the digests of files that no longer
693     //exist, the program may unknowingly delete the last copies of files (because it believes based on
694     //the SHA512 file that other copies exist when in fact they do not).
695     //
696     //A typical method of generating an SHA512 file is to use
697     //
698     //The SHA512 file does not need to be sorted (this program sorts it internally by hash before using it).
699     //
700     //This program is designed to compile and run under Cygwin or *nix only.
701     //
702     //Usage:
703     // qdedup
704     // Prints help information and exits.
705     // qdedup ndups <sha512file>
706     // Prints statistics about the number of duplicates in <sha512file>.
707     // qdedup filterdups <sha512file>
708     // Analyzes duplicates and prints the filenames of groups of duplicates. The output is designed
709     // for hand analysis so that insight can be gained into what duplicates exist and where they
710     // are located.
711     // qdedup dedup <sha512file> <path>
712     // For each group of duplicates that exists, preserves the duplicates that exist within path
713     // and removes all others. If no copies of the duplicate exist within path, no copies of the
714     // duplicate will be removed.
715     // qdedup dedupnopath <sha512file>
716     // For each group of duplicates that exists, preserves only the first (the only with lowest
717     // sort-order filename).
718     // qdedup dryrun <sha512file> <path>
719     // Prints output indicating which files the program would remove if run with the "dedup" option,
720     // but does not remove any files.
721     // qdedup dryrunnopath <sha512file>
722     // Prints output indicating which files the program would remove if run with the "dedupnopath"
723     // option, but does not remove any files.
724     //----------------------------------------------------------------------------------------------------
725     #include <math.h>
726     #include <stdio.h>
727     #include <stdlib.h>
728     #include <string.h>
729     #include <time.h>
730     #include <unistd.h>
731     //----------------------------------------------------------------------------------------------------
732     #define LINELEN (78) //Number of printable characters in a line.
733     #define MAXLINELEN (2000) //The maximum number of characters that may be in a line of the
734     //SHA512 input file. This count includes the \0 terminator, so only
735     //this value minus 1 characters may be in a line.
736     #define UNLINKPAUSETIME (0.1) //Number of seconds to pause between file unlinks (deletions). This
737     //is designed to give the user time to abort the program if desired
738     //before catastrophic quantities of files are deleted.
739     //----------------------------------------------------------------------------------------------------
740     //Data structure that holds the character representation of and SHA512 hash, plus the specified
741     //filename.
742     typedef struct
743     {
744     char hash[129];
745     //512/4 = 128 characters for the hash, plus 1 character for zero terminator.
746     char *fname;
747     //Filename as specified in the file, allocated via malloc() family.
748     } tFileHashRecord;
749     //----------------------------------------------------------------------------------------------------
750     //----------------------------------------------------------------------------------------------------
751     //----- CHARACTER CLASSIFICATION FUNCTIONS ---------------------------------------------------------
752     //----------------------------------------------------------------------------------------------------
753     //----------------------------------------------------------------------------------------------------
754     //TRUE if character is part of valid hash.
755     int is_valid_hash_char(char c)
756     {
757     switch(c)
758     {
759     case '0':
760     case '1':
761     case '2':
762     case '3':
763     case '4':
764     case '5':
765     case '6':
766     case '7':
767     case '8':
768     case '9':
769     case 'a':
770     case 'b':
771     case 'c':
772     case 'd':
773     case 'e':
774     case 'f':
775     return(1);
776     break;
777     default:
778     return(0);
779     break;
780     }
781     }
782     //----------------------------------------------------------------------------------------------------
783     //TRUE if character is part of newline sequence
784     int is_newline_sequence_char(char c)
785     {
786     switch(c)
787     {
788     case 13:
789     case 10:
790     return(1);
791     break;
792     default:
793     return(0);
794     break;
795     }
796     }
797    
798     //----------------------------------------------------------------------------------------------------
799     //----------------------------------------------------------------------------------------------------
800     //----- FORMATTED OUTPUT FUNCTIONS -----------------------------------------------------------------
801     //----------------------------------------------------------------------------------------------------
802     //----------------------------------------------------------------------------------------------------
803     //Repeats a character to a stream a specified number of times.
804     //
805     void stream_rep_char(FILE *s, char c, unsigned n)
806     {
807     while(n--)
808     {
809     fprintf(s, "%c", c);
810     }
811     }
812     //----------------------------------------------------------------------------------------------------
813     //Prints a horizontal line to a stream, including the newline.
814     //
815     void stream_hline(FILE *s)
816     {
817     stream_rep_char(s, '-', LINELEN);
818     fprintf(s, "\n");
819     }
820     //----------------------------------------------------------------------------------------------------
821     //Prints a horizontal line to a stdout, including the newline.
822     //
823     void stdout_hline(void)
824     {
825     stream_rep_char(stdout, '-', LINELEN);
826     fprintf(stdout, "\n");
827     }
828     //----------------------------------------------------------------------------------------------------
829     //----------------------------------------------------------------------------------------------------
830     //----- FATAL ERROR FUNCTIONS ----------------------------------------------------------------------
831     //----------------------------------------------------------------------------------------------------
832     //----------------------------------------------------------------------------------------------------
833     //Errors out fatally.
834     //
835     void fatal(const char *desc, const char *file, unsigned line)
836     {
837     stdout_hline();
838     printf("Fatal error: %s\n", desc);
839     printf("Source file: %s\n", file);
840     printf("Line : %u\n", line);
841     stdout_hline();
842     exit(1);
843     }
844     //----------------------------------------------------------------------------------------------------
845     //----------------------------------------------------------------------------------------------------
846     //----- MEMORY ALLOCATION WRAPPERS -----------------------------------------------------------------
847     //----------------------------------------------------------------------------------------------------
848     //----------------------------------------------------------------------------------------------------
849     //malloc() wrapper.
850     void *w_malloc(size_t nbytes)
851     {
852     void *rv;
853    
854     if (!nbytes)
855     {
856     fatal("Memory allocation request for 0 bytes.", __FILE__, __LINE__);
857     }
858    
859     rv = malloc(nbytes);
860    
861     if (!rv)
862     {
863     fatal("Out of memory in malloc() request.", __FILE__, __LINE__);
864     }
865    
866     //Zero out, just for consistency.
867     memset(rv, 0, nbytes);
868     }
869     //----------------------------------------------------------------------------------------------------
870     //realloc() wrapper.
871     void *w_realloc(void *p, size_t n)
872     {
873     void *rv;
874    
875     if (!n)
876     {
877     fatal("Memory reallocation request for 0 bytes.", __FILE__, __LINE__);
878     }
879    
880     if (!p)
881     {
882     fatal("Memory reallocation request with NULL pointer.", __FILE__, __LINE__);
883     }
884    
885     rv = realloc(p, n);
886    
887     if (!rv)
888     {
889     fatal("Out of memory in realloc() request.", __FILE__, __LINE__);
890     }
891     }
892     //----------------------------------------------------------------------------------------------------
893     //----------------------------------------------------------------------------------------------------
894     //----- SLEEP FUNCTIONS ----------------------------------------------------------------------------
895     //----------------------------------------------------------------------------------------------------
896     //----------------------------------------------------------------------------------------------------
897     //Sleep for a time, in seconds.
898     void w_sleep(double seconds)
899     {
900     struct timespec t;
901    
902     if (seconds < 0)
903     {
904     fatal("Sleep for negative time request.", __FILE__, __LINE__);
905     }
906     else if (seconds > 3600)
907     {
908     fatal("Sleep for too long request.", __FILE__, __LINE__);
909     }
910    
911     t.tv_sec = floor(seconds);
912     t.tv_nsec = (seconds - floor(seconds)) * 1E9;
913    
914     nanosleep(&t, NULL);
915     }
916     //----------------------------------------------------------------------------------------------------
917     //----------------------------------------------------------------------------------------------------
918     //----- SHA512 FIELD READ FUNCTIONS ----------------------------------------------------------------
919     //----------------------------------------------------------------------------------------------------
920     //----------------------------------------------------------------------------------------------------
921     //These functions read in an individual field of a standard SHA512 file generated using application
922     //of the standard sha512sum program.
923     //
924     //*rcode = 1, success.
925     // 0, legal end of file, record assigned.
926     void get_sha512file_line(FILE *s, int *rcode, tFileHashRecord *hash_rec)
927     {
928     unsigned bidx;
929     unsigned nchars;
930     int ic;
931     int exitflag;
932     int eoffound;
933     int eolfound;
934     char c;
935     char buf[MAXLINELEN];
936    
937     //Zero out the buffer. This handles string termination automatically.
938     memset(buf, 0, sizeof(buf));
939    
940     //Read characters into the buffer until either hit EOF, newline, or can't
941     //fill the buffer any longer.
942     eoffound = 0;
943     eolfound = 0;
944     exitflag = 0;
945     bidx = 0;
946     do
947     {
948     ic = fgetc(s);
949     c = ic;
950    
951     if (ic == EOF)
952     {
953     eoffound = 1;
954     eolfound = 0;
955     nchars = bidx;
956     exitflag = 1;
957     }
958     else if (is_newline_sequence_char(c))
959     {
960     eoffound = 0;
961     eolfound = 1;
962     nchars = bidx;
963     exitflag = 1;
964     }
965     else if (bidx >= (MAXLINELEN - 1))
966     {
967     fatal("SHA512 hash file line too long to parse.", __FILE__, __LINE__);
968     }
969     else
970     {
971     buf[bidx] = c;
972     bidx++;
973     exitflag = 0;
974     }
975     } while(! exitflag);
976    
977     //If we encountered a newline, inch past it. We may encounter an EOF.
978     if (eolfound)
979     {
980     exitflag = 0;
981     do
982     {
983     ic = fgetc(s);
984     c = ic;
985    
986     if (ic == EOF)
987     {
988     eoffound = 1;
989     eolfound = 0;
990     exitflag = 1;
991     }
992     else if (is_newline_sequence_char(c))
993     {
994     exitflag = 0;
995     }
996     else
997     {
998     //We hit the next line. Put the character back.
999     eoffound = 0;
1000     eolfound = 1;
1001     ungetc(ic, s);
1002     exitflag = 1;
1003     }
1004     } while(! exitflag);
1005     }
1006    
1007     //For better or worse, we have a \0-terminated line in the buffer.
1008     //
1009     //Zero the caller's area. This takes care of the hash terminator as well.
1010     memset(hash_rec, 0, sizeof(*hash_rec));
1011    
1012     //Ensure that we have at least 128 characters, and they are all hex characters.
1013     //Otherwise, we can't proceed.
1014     if (nchars < 128)
1015     {
1016     fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
1017     }
1018     else
1019     {
1020     for (bidx = 0; bidx < 128; bidx++)
1021     {
1022     if (! is_valid_hash_char(buf[bidx]))
1023     {
1024     fatal("Character in SHA512 hash portion of line inconsistent with hash.", __FILE__, __LINE__);
1025     }
1026     }
1027     }
1028    
1029     //The 129th and 130'th character must be present and must be a space and asterisk, respectively.
1030     if (nchars < 130)
1031     {
1032     fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
1033     }
1034     else if (buf[128] != ' ')
1035     {
1036     fatal("129th hash line character must be \" \".", __FILE__, __LINE__);
1037     }
1038     else if (buf[129] != '*')
1039     {
1040     fatal("130th hash line character must be \"*\".", __FILE__, __LINE__);
1041     }
1042    
1043     //There must be a 131'st character. Beyond that, we can't qualify, because filenames may
1044     //have odd characters and may be of any length.
1045     if (nchars < 131)
1046     {
1047     fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
1048     }
1049    
1050     //Copy the hash to the caller's area. The terminator has already been inserted.
1051     memcpy(&(hash_rec->hash[0]), buf, 128);
1052    
1053     //Allocate space for the filename.
1054     hash_rec->fname = w_malloc(strlen(buf+130) + 1);
1055    
1056     //Make the copy.
1057     strcpy(hash_rec->fname, buf+130);
1058    
1059     if (eoffound)
1060     *rcode = 0;
1061     else
1062     *rcode = 1;
1063     }
1064     //----------------------------------------------------------------------------------------------------
1065     void parseinputfile(tFileHashRecord **parsed_recs, unsigned *count, char *fname)
1066     {
1067     FILE *s;
1068     int rcode;
1069    
1070     //Try to open the file for reading. Inability is a failure.
1071     s = fopen(fname, "r");
1072     if (!s)
1073     {
1074     fatal("Hash file open failure.", __FILE__, __LINE__);
1075     }
1076    
1077     //Start off with a count of 0 and a NULL pointer.
1078     *count = 0;
1079     *parsed_recs = NULL;
1080    
1081     do
1082     {
1083     //For the first time, allocate space for one record. Beyond that,
1084     //expand it.
1085     if (! *parsed_recs)
1086     {
1087     *parsed_recs = w_malloc(sizeof(tFileHashRecord));
1088     }
1089     else
1090     {
1091     *parsed_recs = w_realloc(*parsed_recs, (size_t)((*count + 1)) * sizeof(tFileHashRecord));
1092     }
1093    
1094     //Parse and fill in the space.
1095     get_sha512file_line(s, &rcode, (*parsed_recs) + (*count));
1096    
1097     //We now have one more.
1098     (*count)++;
1099     } while(rcode == 1);
1100    
1101     //Try to close the file. Inability is a failure.
1102     if (fclose(s))
1103     {
1104     fatal("Hash file close failure.", __FILE__, __LINE__);
1105     }
1106     }
1107     //----------------------------------------------------------------------------------------------------
1108     int sortcmpascendinghash(const void *p0_in, const void *p1_in)
1109     {
1110     const tFileHashRecord *p0, *p1;
1111    
1112     p0 = p0_in;
1113     p1 = p1_in;
1114    
1115     return(strcmp(p0->hash, p1->hash));
1116     }
1117    
1118     //----------------------------------------------------------------------------------------------------
1119     void sortinternaldsbyhash(tFileHashRecord *parsed_recs, unsigned count)
1120     {
1121     qsort(parsed_recs, count, sizeof(tFileHashRecord), sortcmpascendinghash);
1122     }
1123     //----------------------------------------------------------------------------------------------------
1124     int sortcmpascendingfname(const void *p0_in, const void *p1_in)
1125     {
1126     const tFileHashRecord *p0, *p1;
1127    
1128     p0 = p0_in;
1129     p1 = p1_in;
1130    
1131     return(strcmp(p0->fname, p1->fname));
1132     }
1133     //----------------------------------------------------------------------------------------------------
1134     //This sort has to be run after the hash sort. Within groups of identical hashes, it sorts by
1135     //ascending filename.
1136     void sortinternalgroupfname(tFileHashRecord *parsed_recs, unsigned count)
1137     {
1138     unsigned ui;
1139     unsigned i_group_min, i_group_max;
1140    
1141     if (! count)
1142     return;
1143    
1144     i_group_min = 0;
1145     i_group_max = 0;
1146    
1147     do
1148     {
1149     //Advance i_group_max to the end of the group of duplicates.
1150     while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
1151     {
1152     i_group_max++;
1153     }
1154    
1155     if (i_group_min != i_group_max)
1156     {
1157     //Sort the internal group.
1158     qsort(parsed_recs + i_group_min,
1159     i_group_max - i_group_min + 1,
1160     sizeof(tFileHashRecord),
1161     sortcmpascendingfname);
1162     }
1163    
1164     //On to the next group.
1165     i_group_max++;
1166     i_group_min = i_group_max;
1167    
1168     } while (i_group_max < (count - 1));
1169     }
1170     //----------------------------------------------------------------------------------------------------
1171     void printsinglerecord(tFileHashRecord *rec, unsigned elno)
1172     {
1173     printf("[%9u]\n", elno);
1174     printf("Hash : %s\n", rec->hash);
1175     printf("Filename : %s\n", rec->fname);
1176     stdout_hline();
1177     }
1178     //----------------------------------------------------------------------------------------------------
1179     void printinternalds(tFileHashRecord *parsed_recs, unsigned count)
1180     {
1181     unsigned i;
1182    
1183     for (i=0; i<count; i++)
1184     {
1185     printsinglerecord(parsed_recs + i, i);
1186     }
1187     }
1188     //----------------------------------------------------------------------------------------------------
1189     void gather_dup_stats(tFileHashRecord *parsed_recs, unsigned count, unsigned *out_num_dups, unsigned *out_cumulative_dups)
1190     {
1191     unsigned i_group_min, i_group_max;
1192    
1193     *out_num_dups = 0;
1194     *out_cumulative_dups = 0;
1195    
1196     if (! count)
1197     return;
1198    
1199     i_group_min = 0;
1200     i_group_max = 0;
1201    
1202     do
1203     {
1204     //Advance i_group_max to the end of the group of duplicates.
1205     while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
1206     {
1207     i_group_max++;
1208     }
1209    
1210     //Log the findings.
1211     if (i_group_min != i_group_max)
1212     {
1213     (*out_num_dups)++;
1214     (*out_cumulative_dups) += (i_group_max - i_group_min + 1);
1215     }
1216    
1217     //On to the next group.
1218     i_group_max++;
1219     i_group_min = i_group_max;
1220    
1221     } while (i_group_max < (count - 1));
1222     }
1223     //----------------------------------------------------------------------------------------------------
1224     void option_dups(char *fname)
1225     {
1226     tFileHashRecord *parsed_recs;
1227     unsigned count, num_dups, cumulative_dups;
1228    
1229     parseinputfile(&parsed_recs, &count, fname);
1230     //printf("%u records parsed.\n", count);
1231     sortinternaldsbyhash(parsed_recs, count);
1232     sortinternalgroupfname(parsed_recs, count);
1233     printinternalds(parsed_recs, count);
1234     stdout_hline();
1235     gather_dup_stats(parsed_recs, count, &num_dups, &cumulative_dups);
1236     printf("Number of duplicated files : %u\n", num_dups);
1237     if (num_dups)
1238     {
1239     printf("Average number of duplicates: %.2f\n", (double)cumulative_dups/(double)num_dups);
1240     }
1241     }
1242     //----------------------------------------------------------------------------------------------------
1243     void option_filterdups(char *fname)
1244     {
1245     tFileHashRecord *parsed_recs;
1246     unsigned dupgroup;
1247     unsigned count;
1248     unsigned ui;
1249     unsigned i_group_min, i_group_max;
1250    
1251     parseinputfile(&parsed_recs, &count, fname);
1252     //printf("%u records parsed.\n", count);
1253     sortinternaldsbyhash(parsed_recs, count);
1254     sortinternalgroupfname(parsed_recs, count);
1255    
1256     if (! count)
1257     return;
1258    
1259     dupgroup = 0;
1260     i_group_min = 0;
1261     i_group_max = 0;
1262    
1263     do
1264     {
1265     //Advance i_group_max to the end of the group of duplicates.
1266     while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
1267     {
1268     i_group_max++;
1269     }
1270    
1271     //Print the findings.
1272     if (i_group_min != i_group_max)
1273     {
1274     printf("Duplicate group %u:\n", dupgroup);
1275     for (ui = i_group_min; ui <= i_group_max; ui++)
1276     {
1277     printf("%s\n", parsed_recs[ui].fname);
1278     }
1279    
1280     dupgroup++;
1281    
1282     stdout_hline();
1283     }
1284    
1285     //On to the next group.
1286     i_group_max++;
1287     i_group_min = i_group_max;
1288    
1289     } while (i_group_max < (count - 1));
1290     }
1291     //----------------------------------------------------------------------------------------------------
1292     //Returns true if the filename is within the specified path, or false otherwise.
1293     int is_path_member(const char *fname, const char *path)
1294     {
1295     if (strlen(fname) == 0)
1296     {
1297     fatal("Zero-length filename.", __FILE__, __LINE__);
1298     }
1299     else if (strlen(path) == 0)
1300     {
1301     fatal("Zero-length path.", __FILE__, __LINE__);
1302     }
1303     else if (path[strlen(path) - 1] != '/')
1304     {
1305     fatal("Paths must canonically end with forward slash character.", __FILE__, __LINE__);
1306     }
1307     else if (strlen(fname) <= strlen(path))
1308     {
1309     //Can't be in the path because filename is not longer than path name.
1310     return 0;
1311     }
1312     else if (memcmp(fname, path, strlen(path)) == 0)
1313     {
1314     return 1;
1315     }
1316     else
1317     {
1318     return 0;
1319     }
1320     }
1321     //----------------------------------------------------------------------------------------------------
1322     void option_dedup(char *fname, char *path, int may_delete, double pause_time)
1323     {
1324     tFileHashRecord *parsed_recs;
1325     unsigned dupgroup;
1326     unsigned count;
1327     unsigned ui;
1328     unsigned within_path;
1329     unsigned i_group_min, i_group_max;
1330    
1331     parseinputfile(&parsed_recs, &count, fname);
1332     //printf("%u records parsed.\n", count);
1333     sortinternaldsbyhash(parsed_recs, count);
1334     sortinternalgroupfname(parsed_recs, count);
1335    
1336     if (! count)
1337     return;
1338    
1339     dupgroup = 0;
1340     i_group_min = 0;
1341     i_group_max = 0;
1342    
1343     do
1344     {
1345     //Advance i_group_max to the end of the group of duplicates.
1346     while ((i_group_max < (count - 1)) && (! strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
1347     {
1348     i_group_max++;
1349     }
1350    
1351     //If this is a group of duplicates.
1352     if (i_group_min != i_group_max)
1353     {
1354     //Print the findings.
1355     printf("Duplicate group %u:\n", dupgroup);
1356     for (ui = i_group_min; ui <= i_group_max; ui++)
1357     {
1358     printf("%s\n", parsed_recs[ui].fname);
1359     }
1360    
1361     dupgroup++;
1362    
1363     stdout_hline();
1364    
1365     //Count how many of the group of duplicates are within the supplied path.
1366     within_path = 0;
1367     for (ui = i_group_min; ui <= i_group_max; ui++)
1368     {
1369     if (is_path_member(parsed_recs[ui].fname, path))
1370     {
1371     within_path++;
1372     }
1373     }
1374    
1375     //We have to take different actions based on whether we do or don't have any within path.
1376     //If we don't have any, we may delete nothing.
1377     if (! within_path)
1378     {
1379     printf("None of these duplicates in path--taking no action.\n");
1380     //stdout_hline();
1381     }
1382     else
1383     {
1384     for (ui = i_group_min; ui <= i_group_max; ui++)
1385     {
1386     if (is_path_member(parsed_recs[ui].fname, path))
1387     {
1388     printf("Not deleting: %s\n", parsed_recs[ui].fname);
1389     }
1390     else
1391     {
1392     printf("Deleting : %s\n", parsed_recs[ui].fname);
1393     if (may_delete)
1394     {
1395     if (! unlink(parsed_recs[ui].fname))
1396     {
1397     printf(" File deleted (unlinked) successfully.\n");
1398     }
1399     else
1400     {
1401     printf(" Failure attempting to delete (unlink) file.\n");
1402     }
1403     }
1404     else
1405     {
1406     printf(" Dry run only.\n");
1407     }
1408     }
1409    
1410     //w_sleep(pause_time);
1411     }
1412     }
1413    
1414     stdout_hline();
1415     }
1416    
1417     //On to the next group.
1418     i_group_max++;
1419     i_group_min = i_group_max;
1420    
1421     } while (i_group_max < (count - 1));
1422     }
1423     //----------------------------------------------------------------------------------------------------
1424     int main(int argc, char* argv[])
1425     {
1426     stdout_hline();
1427     printf("Execution begins.\n");
1428     stdout_hline();
1429    
1430     if (argc == 1)
1431     {
1432     }
1433     else if ((argc == 3) && (strcmp(argv[1], "ndups") == 0))
1434     {
1435     option_dups(argv[2]);
1436     }
1437     else if ((argc == 3) && (strcmp(argv[1], "filterdups") == 0))
1438     {
1439     option_filterdups(argv[2]);
1440     }
1441     else if ((argc == 3) && (strcmp(argv[1], "dedupnopath") == 0))
1442     {
1443     //option_filterdups(argv[2]);
1444     }
1445     else if ((argc == 3) && (strcmp(argv[1], "dryrunnopath") == 0))
1446     {
1447     //option_filterdups(argv[2]);
1448     }
1449     else if ((argc == 4) && (strcmp(argv[1], "dedup") == 0))
1450     {
1451     option_dedup(argv[2], argv[3], 1, UNLINKPAUSETIME);
1452     }
1453     else if ((argc == 4) && (strcmp(argv[1], "dryrun") == 0))
1454     {
1455     option_dedup(argv[2], argv[3], 0, UNLINKPAUSETIME/10.0);
1456     }
1457     else
1458     {
1459     printf("Unrecognized parameter form. Try \"dedup\".\n");
1460     }
1461    
1462     //w_sleep(-3 /* UNLINKPAUSETIME*/ );
1463    
1464     //stdout_hline();
1465     printf("Execution ends.\n");
1466     stdout_hline();
1467    
1468     return 0;
1469     }
1470     //----------------------------------------------------------------------------------------------------
1471    

Properties

Name Value
svn:eol-style native

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25