1 |
//$Header$ |
2 |
//{211fc600-db2e-4703-bf40-968a2f063b13} |
3 |
//------------------------------------------------------------------------------------------------- |
4 |
//Copyright (c) 2018, David T. Ashley |
5 |
// |
6 |
//This file is part of "ets_dedup", a program for eliminating duplicate files in a subdirectory |
7 |
//tree. |
8 |
// |
9 |
//This source code and any program in which it is compiled/used is licensed under the MIT License, |
10 |
//reproduced below. |
11 |
// |
12 |
//Permission is hereby granted, free of charge, to any person obtaining a copy of |
13 |
//this software and associated documentation files(the "Software"), to deal in the |
14 |
//Software without restriction, including without limitation the rights to use, |
15 |
//copy, modify, merge, publish, distribute, sublicense, and / or sell copies of the |
16 |
//Software, and to permit persons to whom the Software is furnished to do so, |
17 |
//subject to the following conditions : |
18 |
// |
19 |
//The above copyright notice and this permission notice shall be included in all |
20 |
//copies or substantial portions of the Software. |
21 |
// |
22 |
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
23 |
//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
24 |
//FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
25 |
//AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
26 |
//LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
27 |
//OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
28 |
//SOFTWARE. |
29 |
//------------------------------------------------------------------------------------------------- |
30 |
#include <stdio.h> |
31 |
|
32 |
const char *ets_dedup_description[] = |
33 |
{ |
34 |
"ets_dedup (mnemonic: DE-DUPlicate) is a program for identifying and", |
35 |
"eliminating duplicate files (by any name) at any depth in a subdirectory and" |
36 |
"its children. The most common application for the program would be the", |
37 |
"reduction of personal clutter, i.e. duplicate photos and downloads.", |
38 |
}; |
39 |
|
40 |
const char *ets_dedup_instructions[] = |
41 |
{ |
42 |
"Usage", |
43 |
"-----" |
44 |
"ets_dedup [-option_1 [ ... [-option_n]]] [--] [pref_dir_1 [ ... [pref_dir_n]]]" |
45 |
" If no options are provided, emits full documentation to stdout. If options", |
46 |
" are provided, analyzes and optionally deletes duplicate files in the", |
47 |
" current working directory and all its children. With large sets of files", |
48 |
" This program can take a long time to run (hours), because it calculates", |
49 |
" the SHA512 digest of every file." |
50 |
"", |
51 |
" ets_dedup is a dangerous program in that it can destroy information (which", |
52 |
" file is in which directory is information, and it is possible to destroy", |
53 |
" information without deleting the last of a set of duplicate files).", |
54 |
" However, ets_dedup is safe in the sense that it will never delete the last", |
55 |
" of a set of identical files (this cannot be done using this program,", |
56 |
" automatically or manually)." |
57 |
"", |
58 |
"Options", |
59 |
"-------", |
60 |
"-report", |
61 |
" Analyzes the current working directory and all its children for duplicates,", |
62 |
" and writes a full report to the console. The report includes which files", |
63 |
" are duplicates, and approximately how much storage space would be saved by", |
64 |
" eliminating all duplicates and by eliminating duplicates of individual", |
65 |
" files and of subdirectories. The report is voluminous and is typically", |
66 |
" redirected to a file.", |
67 |
"-dedup_full_auto", |
68 |
" Deletes all duplicate files, leaving only one copy (by any name or", |
69 |
" extension) of any file. If duplicates are in the same directory, the", |
70 |
" first one in alphabetical order is retained. If duplicates are in different", |
71 |
" directories, a non-deterministic algorithm is used that tends to leave", |
72 |
" larger directories intact while consuming smaller directories.", |
73 |
"-dedup_auto_dir_pri", |
74 |
" Deletes all duplicate files, leaving only one copy (by any name or", |
75 |
" extension). However, in the selection of which duplicates to delete, the", |
76 |
" copy in pref_dir_1 is given preference to remain over the copy in", |
77 |
" pref_dir_2, ..., over the copy in pref_dir_n, and finally over files in", |
78 |
" subdirectories not covered by any of the specified directories. If", |
79 |
" multiple copies of a file exist in the highest-priority preferred directory", |
80 |
" specified, they are all retained. If there are duplicates that exist only", |
81 |
" outside the set of specified preferred directories, none are deleted." |
82 |
"-dedup_auto_dir_equal", |
83 |
" The specified directories are given priority over all directories not", |
84 |
" specified (this creates two equivalence classes--the directories specified", |
85 |
" and the directories not specified). Duplicates that exist both in at least", |
86 |
" one of the specified directories and outside the set of specified", |
87 |
" directories have the outside copies deleted. No files within the set of", |
88 |
" specified directories are deleted. If a file has copies only outside, no", |
89 |
" copies are deleted.", |
90 |
"-dedup_manual_interactive", |
91 |
" Performs a full analysis, then allows interactive manual operations. The", |
92 |
" operations involve descending into and ascending out of directories, and", |
93 |
" setting a given directory or file as authoritative (meaning all external", |
94 |
" copies will be deleted) or non-authoritative (meaning that duplicates", |
95 |
" within the non-authoritative object are not retained).", |
96 |
"-dry_run", |
97 |
" Provides all information about what would have been deleted, but deletes no", |
98 |
" files. This option can be useful for ensuring that the behavior of the", |
99 |
" program will be acceptable.", |
100 |
"", |
101 |
"Limitations", |
102 |
"-----------", |
103 |
" ( 1) Unicode in path names supplied on the command line and in file and", |
104 |
" directory names is not supported.", |
105 |
" ( 2) Unicode in file and directory names may or may not be supported.", |
106 |
" This depends on technical details of Linux/Unix and Windows that", |
107 |
" are tool voluminous to include here.", |
108 |
" ( 3) The program rebuilds its internal data structures each time it is", |
109 |
" run (which involves calculating the SHA512 digest of every file in", |
110 |
" the current working directory and its children). This is a very", |
111 |
" time-consuming operation. The program does not save any information", |
112 |
" between invocations." |
113 |
" ( 4) The program builds all data structures in memory, and so is limited" |
114 |
" by the amount of usable memory in the computer system. A reasonable", |
115 |
" estimate of memory consumption might be 250 bytes per file to be", |
116 |
" analyzed (100 bytes for the path name, 128 bytes for the SHA512", |
117 |
" digest, and 22 bytes for other overhead). Assuming 1GB of usable", |
118 |
" memory, this gives an upper limit of around 4 million files.", |
119 |
" This suggests that the program would be usable for most de-duplication", |
120 |
" tasks.", |
121 |
" ( 5) The program does not provide information about near duplicates.", |
122 |
" The program processes files only in terms of same or different.", |
123 |
"", |
124 |
"Technical Notes", |
125 |
"---------------", |
126 |
" ( 1) Although the probability of two files with different contents having", |
127 |
" the same SHA512 digest is astronomically small (a hash collision has", |
128 |
" never been found), the program handles this case by ", |
129 |
|
130 |
}; |
131 |
|
132 |
|
133 |
int c_main(int argc, char **argv) |
134 |
{ |
135 |
printf("Execution begins.\n"); |
136 |
printf("Execution ends.\n"); |
137 |
return 0; |
138 |
} |