/[dtapublic]/projs/dtats/trunk/projs/2016/20161007_ddeedduupp/win/ddeedduupp/ddeedduupp.cpp
ViewVC logotype

Contents of /projs/dtats/trunk/projs/2016/20161007_ddeedduupp/win/ddeedduupp/ddeedduupp.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 310 - (show annotations) (download)
Wed Jan 1 23:15:08 2020 UTC (4 years, 5 months ago) by dashley
File size: 27071 byte(s)
Remove extra file.
Place file contents in new project for analysis.
1 // ddeedduupp.cpp : This file contains the 'main' function. Program execution begins and ends there.
2 //
3
4 #include <iostream>
5
6 int main()
7 {
8 std::cout << "Hello World!\n";
9 }
10
11 // Run program: Ctrl + F5 or Debug > Start Without Debugging menu
12 // Debug program: F5 or Debug > Start Debugging menu
13
14 // Tips for Getting Started:
15 // 1. Use the Solution Explorer window to add/manage files
16 // 2. Use the Team Explorer window to connect to source control
17 // 3. Use the Output window to see build output and other messages
18 // 4. Use the Error List window to view errors
19 // 5. Go to Project > Add New Item to create new code files, or Project > Add Existing Item to add existing code files to the project
20 // 6. In the future, to open this project again, go to File > Open > Project and select the .sln file
21
22 #if 0
23 //----------------------------------------------------------------------------------------------------
24 //$Header$
25 //----------------------------------------------------------------------------------------------------
26 //qdedup.c
27 //----------------------------------------------------------------------------------------------------
28 //Quick and dirty program to eliminate duplicates from a file tree. A file containing the SHA512
29 //hashes of all the files to be considered must already exist, and must be regenerated each time the
30 //underlying files are deleted/added/modified, which means the file must regenerated after each run
31 //of qdedup. (WARNING: IF YOU DO NOT REGENERATE THE FILE AFTER EACH RUN OF qdedup, YOU WILL
32 //PROBABLY DESTROY DATA. THE MECHANISM WOULD BE THAT THE SHA512 MANIFEST IMPLIES THAT DUPLICATES
33 //EXIST WHEN THEY NO LONGER DO, SO qdedup WILL ERRONEOUSLY DELETE THE LAST COPIES OF FILES.) The
34 //program will eliminate duplicates within a single specified directory or outside a single specified
35 //directory.
36 //
37 //This program will compile and run only on *nix systems and under Cygwin on Windows systems.
38 //----------------------------------------------------------------------------------------------------
39 //Copyright David T. Ashley (dashley@gmail.com), 2016.
40 //----------------------------------------------------------------------------------------------------
41 //Provided under the MIT LICENSE, reproduced immediately below.
42 //----------------------------------------------------------------------------------------------------
43 //Permission is hereby granted, free of charge, to any person obtaining a copy of
44 //this software and associated documentation files (the "Software"), to deal in the
45 //Software without restriction, including without limitation the rights to use,
46 //copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
47 //Software, and to permit persons to whom the Software is furnished to do so,
48 //subject to the following conditions:
49 //
50 //The above copyright notice and this permission notice shall be included in all
51 //copies or substantial portions of the Software.
52 //
53 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
54 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
55 //FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
56 //AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
57 //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
58 //OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
59 //SOFTWARE.
60 //----------------------------------------------------------------------------------------------------
61 //All paths in the SHA512 file must be absolute or must be relative to the current working directory
62 //at the time this program is run.
63 //
64 //The recommended method to generate the SHA512 file is using the "-exec" option of the "find"
65 //command, i.e.
66 //
67 // find target_directory -type f -exec sha512sum {} \; >sha512sums.txt
68 //
69 //If any files are deleted by the program, a new SHA512 file must be generated before the program is
70 //run again to delete files. The reason for this restriction is that the program will never knowingly
71 //delete the last copy of a file. If the SHA512 file contains the digests of files that no longer
72 //exist, the program may unknowingly delete the last copies of files (because it believes based on
73 //the SHA512 file that other copies exist when in fact they do not).
74 //
75 //The SHA512 file does not need to be sorted (this program sorts it internally by hash before using it).
76 //
77 //This program is designed to compile and run under Cygwin or *nix only.
78 //
79 //Usage:
80 // qdedup
81 // Prints help information and exits.
82 // qdedup ndups <sha512file>
83 // Prints statistics about the number of duplicates in <sha512file>.
84 // qdedup filterdups <sha512file>
85 // Analyzes duplicates and prints the filenames of groups of duplicates. The output is designed
86 // for hand analysis so that insight can be gained into what duplicates exist and where they
87 // are located.
88 // qdedup dedup_preserve_inside <sha512file> <path>
89 // For each group of duplicates that exists, preserves the duplicates that exist within path
90 // and removes all others. If no copies of the duplicate exist within path, no copies of the
91 // duplicate will be removed.
92 // qdedup dryrun_preserve_inside <sha512file> <path>
93 // Exactly like "dedup_preserve_inside", except that no files will be deleted. Text will be
94 // output to explain what would be deleted by "dedup_preserve_inside".
95 // qdedup dedup_nopath <sha512file>
96 // For each group of duplicates that exists, preserves only the first (the only with lowest
97 // sort-order filename).
98 // qdedup dryrun_nopath <sha512file> <path>
99 // Exactly like "dedup_nopath", except that no files will be deleted. Text will be
100 // output to explain what would be deleted by "dedup_nopath".
101 // qdedup dedup_preserve_outside <sha512file> <path>
102 // For each group of duplicates that exists, deletes duplicates only from within the specified
103 // path. If any duplicates do not have at least one copy within <path> no instances of the
104 // duplicate are deleted.
105 // qdedup dryrun_preserve_outside <sha512file>
106 // Exactly like "dedup_preserve_outside", except that no files will be deleted. Text will be
107 // output to explain what would be deleted by "dedup_preserve_outside".
108 //----------------------------------------------------------------------------------------------------
109 #include <math.h>
110 #include <stdio.h>
111 #include <stdlib.h>
112 #include <string.h>
113 #include <time.h>
114 #include <unistd.h>
115 //----------------------------------------------------------------------------------------------------
116 #define LINELEN (78) //Number of printable characters in a line.
117 #define MAXLINELEN (2000) //The maximum number of characters that may be in a line of the
118 //SHA512 input file. This count includes the \0 terminator, so only
119 //this value minus 1 characters may be in a line.
120 #define UNLINKPAUSETIME (0.1) //Number of seconds to pause between file unlinks (deletions). This
121 //is designed to give the user time to abort the program if desired
122 //before catastrophic quantities of files are deleted.
123 //----------------------------------------------------------------------------------------------------
124 //Data structure that holds the character representation of and SHA512 hash, plus the specified
125 //filename.
126 typedef struct
127 {
128 char hash[129];
129 //512/4 = 128 characters for the hash, plus 1 character for zero terminator.
130 char* fname;
131 //Filename as specified in the file, allocated via malloc() family.
132 } tFileHashRecord;
133 //----------------------------------------------------------------------------------------------------
134 //----------------------------------------------------------------------------------------------------
135 //----- CHARACTER CLASSIFICATION FUNCTIONS ---------------------------------------------------------
136 //----------------------------------------------------------------------------------------------------
137 //----------------------------------------------------------------------------------------------------
138 //TRUE if character is part of valid hash.
139 int is_valid_hash_char(char c)
140 {
141 switch (c)
142 {
143 case '0':
144 case '1':
145 case '2':
146 case '3':
147 case '4':
148 case '5':
149 case '6':
150 case '7':
151 case '8':
152 case '9':
153 case 'a':
154 case 'b':
155 case 'c':
156 case 'd':
157 case 'e':
158 case 'f':
159 return(1);
160 break;
161 default:
162 return(0);
163 break;
164 }
165 }
166 //----------------------------------------------------------------------------------------------------
167 //TRUE if character is part of newline sequence
168 int is_newline_sequence_char(char c)
169 {
170 switch (c)
171 {
172 case 13:
173 case 10:
174 return(1);
175 break;
176 default:
177 return(0);
178 break;
179 }
180 }
181
182 //----------------------------------------------------------------------------------------------------
183 //----------------------------------------------------------------------------------------------------
184 //----- FORMATTED OUTPUT FUNCTIONS -----------------------------------------------------------------
185 //----------------------------------------------------------------------------------------------------
186 //----------------------------------------------------------------------------------------------------
187 //Repeats a character to a stream a specified number of times.
188 //
189 void stream_rep_char(FILE* s, char c, unsigned n)
190 {
191 while (n--)
192 {
193 fprintf(s, "%c", c);
194 }
195 }
196 //----------------------------------------------------------------------------------------------------
197 //Prints a horizontal line to a stream, including the newline.
198 //
199 void stream_hline(FILE* s)
200 {
201 stream_rep_char(s, '-', LINELEN);
202 fprintf(s, "\n");
203 }
204 //----------------------------------------------------------------------------------------------------
205 //Prints a horizontal line to a stdout, including the newline.
206 //
207 void stdout_hline(void)
208 {
209 stream_rep_char(stdout, '-', LINELEN);
210 fprintf(stdout, "\n");
211 }
212 //----------------------------------------------------------------------------------------------------
213 //----------------------------------------------------------------------------------------------------
214 //----- FATAL ERROR FUNCTIONS ----------------------------------------------------------------------
215 //----------------------------------------------------------------------------------------------------
216 //----------------------------------------------------------------------------------------------------
217 //Errors out fatally.
218 //
219 void fatal(const char* desc, const char* file, unsigned line)
220 {
221 stdout_hline();
222 printf("Fatal error: %s\n", desc);
223 printf("Source file: %s\n", file);
224 printf("Line : %u\n", line);
225 stdout_hline();
226 exit(1);
227 }
228 //----------------------------------------------------------------------------------------------------
229 //----------------------------------------------------------------------------------------------------
230 //----- MEMORY ALLOCATION WRAPPERS -----------------------------------------------------------------
231 //----------------------------------------------------------------------------------------------------
232 //----------------------------------------------------------------------------------------------------
233 //malloc() wrapper.
234 void* w_malloc(size_t nbytes)
235 {
236 void* rv;
237
238 if (!nbytes)
239 {
240 fatal("Memory allocation request for 0 bytes.", __FILE__, __LINE__);
241 }
242
243 rv = malloc(nbytes);
244
245 if (!rv)
246 {
247 fatal("Out of memory in malloc() request.", __FILE__, __LINE__);
248 }
249
250 //Zero out, just for consistency.
251 memset(rv, 0, nbytes);
252 }
253 //----------------------------------------------------------------------------------------------------
254 //realloc() wrapper.
255 void* w_realloc(void* p, size_t n)
256 {
257 void* rv;
258
259 if (!n)
260 {
261 fatal("Memory reallocation request for 0 bytes.", __FILE__, __LINE__);
262 }
263
264 if (!p)
265 {
266 fatal("Memory reallocation request with NULL pointer.", __FILE__, __LINE__);
267 }
268
269 rv = realloc(p, n);
270
271 if (!rv)
272 {
273 fatal("Out of memory in realloc() request.", __FILE__, __LINE__);
274 }
275 }
276 //----------------------------------------------------------------------------------------------------
277 //----------------------------------------------------------------------------------------------------
278 //----- SLEEP FUNCTIONS ----------------------------------------------------------------------------
279 //----------------------------------------------------------------------------------------------------
280 //----------------------------------------------------------------------------------------------------
281 //Sleep for a time, in seconds.
282 void w_sleep(double seconds)
283 {
284 struct timespec t;
285
286 if (seconds < 0)
287 {
288 fatal("Sleep for negative time request.", __FILE__, __LINE__);
289 }
290 else if (seconds > 3600)
291 {
292 fatal("Sleep for too long request.", __FILE__, __LINE__);
293 }
294
295 t.tv_sec = floor(seconds);
296 t.tv_nsec = (seconds - floor(seconds)) * 1E9;
297
298 nanosleep(&t, NULL);
299 }
300 //----------------------------------------------------------------------------------------------------
301 //----------------------------------------------------------------------------------------------------
302 //----- SHA512 FIELD READ FUNCTIONS ----------------------------------------------------------------
303 //----------------------------------------------------------------------------------------------------
304 //----------------------------------------------------------------------------------------------------
305 //These functions read in an individual field of a standard SHA512 file generated using application
306 //of the standard sha512sum program.
307 //
308 //*rcode = 1, success.
309 // 0, legal end of file, record assigned.
310 void get_sha512file_line(FILE* s, int* rcode, tFileHashRecord* hash_rec)
311 {
312 unsigned bidx;
313 unsigned nchars;
314 int ic;
315 int exitflag;
316 int eoffound;
317 int eolfound;
318 char c;
319 char buf[MAXLINELEN];
320
321 //Zero out the buffer. This handles string termination automatically.
322 memset(buf, 0, sizeof(buf));
323
324 //Read characters into the buffer until either hit EOF, newline, or can't
325 //fill the buffer any longer.
326 eoffound = 0;
327 eolfound = 0;
328 exitflag = 0;
329 bidx = 0;
330 do
331 {
332 ic = fgetc(s);
333 c = ic;
334
335 if (ic == EOF)
336 {
337 eoffound = 1;
338 eolfound = 0;
339 nchars = bidx;
340 exitflag = 1;
341 }
342 else if (is_newline_sequence_char(c))
343 {
344 eoffound = 0;
345 eolfound = 1;
346 nchars = bidx;
347 exitflag = 1;
348 }
349 else if (bidx >= (MAXLINELEN - 1))
350 {
351 fatal("SHA512 hash file line too long to parse.", __FILE__, __LINE__);
352 }
353 else
354 {
355 buf[bidx] = c;
356 bidx++;
357 exitflag = 0;
358 }
359 } while (!exitflag);
360
361 //If we encountered a newline, inch past it. We may encounter an EOF.
362 if (eolfound)
363 {
364 exitflag = 0;
365 do
366 {
367 ic = fgetc(s);
368 c = ic;
369
370 if (ic == EOF)
371 {
372 eoffound = 1;
373 eolfound = 0;
374 exitflag = 1;
375 }
376 else if (is_newline_sequence_char(c))
377 {
378 exitflag = 0;
379 }
380 else
381 {
382 //We hit the next line. Put the character back.
383 eoffound = 0;
384 eolfound = 1;
385 ungetc(ic, s);
386 exitflag = 1;
387 }
388 } while (!exitflag);
389 }
390
391 //For better or worse, we have a \0-terminated line in the buffer.
392 //
393 //Zero the caller's area. This takes care of the hash terminator as well.
394 memset(hash_rec, 0, sizeof(*hash_rec));
395
396 //Ensure that we have at least 128 characters, and they are all hex characters.
397 //Otherwise, we can't proceed.
398 if (nchars < 128)
399 {
400 fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
401 }
402 else
403 {
404 for (bidx = 0; bidx < 128; bidx++)
405 {
406 if (!is_valid_hash_char(buf[bidx]))
407 {
408 fatal("Character in SHA512 hash portion of line inconsistent with hash.", __FILE__, __LINE__);
409 }
410 }
411 }
412
413 //The 129th and 130'th character must be present and must be a space and asterisk, respectively.
414 if (nchars < 130)
415 {
416 fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
417 }
418 else if (buf[128] != ' ')
419 {
420 fatal("129th hash line character must be \" \".", __FILE__, __LINE__);
421 }
422 else if (buf[129] != '*')
423 {
424 fatal("130th hash line character must be \"*\".", __FILE__, __LINE__);
425 }
426 // else if (buf[129] != ' ')
427 // {
428 // //130th character is ' '. Need to figure out why sometimes space and sometimes '*'.
429 // fatal("130th hash line character must be \" \".", __FILE__, __LINE__);
430 // }
431
432 //There must be a 131'st character. Beyond that, we can't qualify, because filenames may
433 //have odd characters and may be of any length.
434 if (nchars < 131)
435 {
436 fatal("SHA512 hash file line too short.", __FILE__, __LINE__);
437 }
438
439 //Copy the hash to the caller's area. The terminator has already been inserted.
440 memcpy(&(hash_rec->hash[0]), buf, 128);
441
442 //Allocate space for the filename.
443 hash_rec->fname = w_malloc(strlen(buf + 130) + 1);
444
445 //Make the copy.
446 strcpy(hash_rec->fname, buf + 130);
447
448 if (eoffound)
449 * rcode = 0;
450 else
451 *rcode = 1;
452 }
453 //----------------------------------------------------------------------------------------------------
454 void parseinputfile(tFileHashRecord** parsed_recs, unsigned* count, char* fname)
455 {
456 FILE* s;
457 int rcode;
458
459 //Try to open the file for reading. Inability is a failure.
460 s = fopen(fname, "r");
461 if (!s)
462 {
463 fatal("Hash file open failure.", __FILE__, __LINE__);
464 }
465
466 //Start off with a count of 0 and a NULL pointer.
467 *count = 0;
468 *parsed_recs = NULL;
469
470 do
471 {
472 //For the first time, allocate space for one record. Beyond that,
473 //expand it.
474 if (!*parsed_recs)
475 {
476 *parsed_recs = w_malloc(sizeof(tFileHashRecord));
477 }
478 else
479 {
480 *parsed_recs = w_realloc(*parsed_recs, (size_t)((*count + 1)) * sizeof(tFileHashRecord));
481 }
482
483 //Parse and fill in the space.
484 get_sha512file_line(s, &rcode, (*parsed_recs) + (*count));
485
486 //We now have one more.
487 (*count)++;
488 } while (rcode == 1);
489
490 //Try to close the file. Inability is a failure.
491 if (fclose(s))
492 {
493 fatal("Hash file close failure.", __FILE__, __LINE__);
494 }
495 }
496 //----------------------------------------------------------------------------------------------------
497 int sortcmpascendinghash(const void* p0_in, const void* p1_in)
498 {
499 const tFileHashRecord* p0, * p1;
500
501 p0 = p0_in;
502 p1 = p1_in;
503
504 return(strcmp(p0->hash, p1->hash));
505 }
506
507 //----------------------------------------------------------------------------------------------------
508 void sortinternaldsbyhash(tFileHashRecord* parsed_recs, unsigned count)
509 {
510 qsort(parsed_recs, count, sizeof(tFileHashRecord), sortcmpascendinghash);
511 }
512 //----------------------------------------------------------------------------------------------------
513 int sortcmpascendingfname(const void* p0_in, const void* p1_in)
514 {
515 const tFileHashRecord* p0, * p1;
516
517 p0 = p0_in;
518 p1 = p1_in;
519
520 return(strcmp(p0->fname, p1->fname));
521 }
522 //----------------------------------------------------------------------------------------------------
523 //This sort has to be run after the hash sort. Within groups of identical hashes, it sorts by
524 //ascending filename.
525 void sortinternalgroupfname(tFileHashRecord* parsed_recs, unsigned count)
526 {
527 unsigned ui;
528 unsigned i_group_min, i_group_max;
529
530 if (!count)
531 return;
532
533 i_group_min = 0;
534 i_group_max = 0;
535
536 do
537 {
538 //Advance i_group_max to the end of the group of duplicates.
539 while ((i_group_max < (count - 1)) && (!strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
540 {
541 i_group_max++;
542 }
543
544 if (i_group_min != i_group_max)
545 {
546 //Sort the internal group.
547 qsort(parsed_recs + i_group_min,
548 i_group_max - i_group_min + 1,
549 sizeof(tFileHashRecord),
550 sortcmpascendingfname);
551 }
552
553 //On to the next group.
554 i_group_max++;
555 i_group_min = i_group_max;
556
557 } while (i_group_max < (count - 1));
558 }
559 //----------------------------------------------------------------------------------------------------
560 void printsinglerecord(tFileHashRecord* rec, unsigned elno)
561 {
562 printf("[%9u]\n", elno);
563 printf("Hash : %s\n", rec->hash);
564 printf("Filename : %s\n", rec->fname);
565 stdout_hline();
566 }
567 //----------------------------------------------------------------------------------------------------
568 void printinternalds(tFileHashRecord* parsed_recs, unsigned count)
569 {
570 unsigned i;
571
572 for (i = 0; i < count; i++)
573 {
574 printsinglerecord(parsed_recs + i, i);
575 }
576 }
577 //----------------------------------------------------------------------------------------------------
578 void gather_dup_stats(tFileHashRecord* parsed_recs, unsigned count, unsigned* out_num_dups, unsigned* out_cumulative_dups)
579 {
580 unsigned i_group_min, i_group_max;
581
582 *out_num_dups = 0;
583 *out_cumulative_dups = 0;
584
585 if (!count)
586 return;
587
588 i_group_min = 0;
589 i_group_max = 0;
590
591 do
592 {
593 //Advance i_group_max to the end of the group of duplicates.
594 while ((i_group_max < (count - 1)) && (!strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
595 {
596 i_group_max++;
597 }
598
599 //Log the findings.
600 if (i_group_min != i_group_max)
601 {
602 (*out_num_dups)++;
603 (*out_cumulative_dups) += (i_group_max - i_group_min + 1);
604 }
605
606 //On to the next group.
607 i_group_max++;
608 i_group_min = i_group_max;
609
610 } while (i_group_max < (count - 1));
611 }
612 //----------------------------------------------------------------------------------------------------
613 void option_dups(char* fname)
614 {
615 tFileHashRecord* parsed_recs;
616 unsigned count, num_dups, cumulative_dups;
617
618 parseinputfile(&parsed_recs, &count, fname);
619 //printf("%u records parsed.\n", count);
620 sortinternaldsbyhash(parsed_recs, count);
621 sortinternalgroupfname(parsed_recs, count);
622 printinternalds(parsed_recs, count);
623 stdout_hline();
624 gather_dup_stats(parsed_recs, count, &num_dups, &cumulative_dups);
625 printf("Number of duplicated files : %u\n", num_dups);
626 if (num_dups)
627 {
628 printf("Average number of duplicates: %.2f\n", (double)cumulative_dups / (double)num_dups);
629 }
630 }
631 //----------------------------------------------------------------------------------------------------
632 void option_filterdups(char* fname)
633 {
634 tFileHashRecord* parsed_recs;
635 unsigned dupgroup;
636 unsigned count;
637 unsigned ui;
638 unsigned i_group_min, i_group_max;
639
640 parseinputfile(&parsed_recs, &count, fname);
641 //printf("%u records parsed.\n", count);
642 sortinternaldsbyhash(parsed_recs, count);
643 sortinternalgroupfname(parsed_recs, count);
644
645 if (!count)
646 return;
647
648 dupgroup = 0;
649 i_group_min = 0;
650 i_group_max = 0;
651
652 do
653 {
654 //Advance i_group_max to the end of the group of duplicates.
655 while ((i_group_max < (count - 1)) && (!strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
656 {
657 i_group_max++;
658 }
659
660 //Print the findings.
661 if (i_group_min != i_group_max)
662 {
663 printf("Duplicate group %u:\n", dupgroup);
664 for (ui = i_group_min; ui <= i_group_max; ui++)
665 {
666 printf("%s\n", parsed_recs[ui].fname);
667 }
668
669 dupgroup++;
670
671 stdout_hline();
672 }
673
674 //On to the next group.
675 i_group_max++;
676 i_group_min = i_group_max;
677
678 } while (i_group_max < (count - 1));
679 }
680 //----------------------------------------------------------------------------------------------------
681 //Returns true if the filename is within the specified path, or false otherwise.
682 int is_path_member(const char* fname, const char* path)
683 {
684 if (strlen(fname) == 0)
685 {
686 fatal("Zero-length filename.", __FILE__, __LINE__);
687 }
688 else if (strlen(path) == 0)
689 {
690 fatal("Zero-length path.", __FILE__, __LINE__);
691 }
692 else if (path[strlen(path) - 1] != '/')
693 {
694 fatal("Paths must canonically end with forward slash character.", __FILE__, __LINE__);
695 }
696 else if (strlen(fname) <= strlen(path))
697 {
698 //Can't be in the path because filename is not longer than path name.
699 return 0;
700 }
701 else if (memcmp(fname, path, strlen(path)) == 0)
702 {
703 return 1;
704 }
705 else
706 {
707 return 0;
708 }
709 }
710 //----------------------------------------------------------------------------------------------------
711 void option_dedup(char* fname, char* path, int may_delete, double pause_time)
712 {
713 tFileHashRecord* parsed_recs;
714 unsigned dupgroup;
715 unsigned count;
716 unsigned ui;
717 unsigned within_path;
718 unsigned i_group_min, i_group_max;
719
720 parseinputfile(&parsed_recs, &count, fname);
721 //printf("%u records parsed.\n", count);
722 sortinternaldsbyhash(parsed_recs, count);
723 sortinternalgroupfname(parsed_recs, count);
724
725 if (!count)
726 return;
727
728 dupgroup = 0;
729 i_group_min = 0;
730 i_group_max = 0;
731
732 do
733 {
734 //Advance i_group_max to the end of the group of duplicates.
735 while ((i_group_max < (count - 1)) && (!strcmp(parsed_recs[i_group_min].hash, parsed_recs[i_group_max + 1].hash)))
736 {
737 i_group_max++;
738 }
739
740 //If this is a group of duplicates.
741 if (i_group_min != i_group_max)
742 {
743 //Print the findings.
744 printf("Duplicate group %u:\n", dupgroup);
745 for (ui = i_group_min; ui <= i_group_max; ui++)
746 {
747 printf("%s\n", parsed_recs[ui].fname);
748 }
749
750 dupgroup++;
751
752 stdout_hline();
753
754 //Count how many of the group of duplicates are within the supplied path.
755 within_path = 0;
756 for (ui = i_group_min; ui <= i_group_max; ui++)
757 {
758 if (is_path_member(parsed_recs[ui].fname, path))
759 {
760 within_path++;
761 }
762 }
763
764 //We have to take different actions based on whether we do or don't have any within path.
765 //If we don't have any, we may delete nothing.
766 if (!within_path)
767 {
768 printf("None of these duplicates in path--taking no action.\n");
769 //stdout_hline();
770 }
771 else
772 {
773 for (ui = i_group_min; ui <= i_group_max; ui++)
774 {
775 if (is_path_member(parsed_recs[ui].fname, path))
776 {
777 printf("Not deleting: %s\n", parsed_recs[ui].fname);
778 }
779 else
780 {
781 printf("Deleting : %s\n", parsed_recs[ui].fname);
782 if (may_delete)
783 {
784 if (!unlink(parsed_recs[ui].fname))
785 {
786 printf(" File deleted (unlinked) successfully.\n");
787 }
788 else
789 {
790 printf(" Failure attempting to delete (unlink) file.\n");
791 }
792 }
793 else
794 {
795 printf(" Dry run only.\n");
796 }
797 }
798
799 //w_sleep(pause_time);
800 }
801 }
802
803 stdout_hline();
804 }
805
806 //On to the next group.
807 i_group_max++;
808 i_group_min = i_group_max;
809
810 } while (i_group_max < (count - 1));
811 }
812 //----------------------------------------------------------------------------------------------------
813 int main(int argc, char* argv[])
814 {
815 stdout_hline();
816 printf("Execution begins.\n");
817 stdout_hline();
818
819 if (argc == 1)
820 {
821 }
822 else if ((argc == 3) && (strcmp(argv[1], "ndups") == 0))
823 {
824 option_dups(argv[2]);
825 }
826 else if ((argc == 3) && (strcmp(argv[1], "filterdups") == 0))
827 {
828 option_filterdups(argv[2]);
829 }
830 else if ((argc == 3) && (strcmp(argv[1], "dedup_nopath") == 0))
831 {
832 //option_filterdups(argv[2]);
833 }
834 else if ((argc == 3) && (strcmp(argv[1], "dryrun_nopath") == 0))
835 {
836 //option_filterdups(argv[2]);
837 }
838 else if ((argc == 4) && (strcmp(argv[1], "dedup_preserve_inside") == 0))
839 {
840 option_dedup(argv[2], argv[3], 1, UNLINKPAUSETIME);
841 }
842 else if ((argc == 4) && (strcmp(argv[1], "dryrun_preserve_inside") == 0))
843 {
844 option_dedup(argv[2], argv[3], 0, UNLINKPAUSETIME / 10.0);
845 }
846 else
847 {
848 printf("Unrecognized parameter form. Try \"dedup\".\n");
849 }
850
851 //w_sleep(-3 /* UNLINKPAUSETIME*/ );
852
853 //stdout_hline();
854 printf("Execution ends.\n");
855 stdout_hline();
856
857 return 0;
858 }
859 //----------------------------------------------------------------------------------------------------
860 #endif

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision URL Header

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25