From ed993cd7c1d87215a93df5e262e4fba3ce57c442 Mon Sep 17 00:00:00 2001
From: jozan <jozan@noemail.net>
Date: Thu, 12 Nov 2020 18:28:47 +0000
Subject: In progress

FossilOrigin-Name: 67a34a272b1dbb5370844efe2378b87c51c0e56c856cf7554993367bab6bfa47
---
 src/c_opts.c    |  7 +++++++
 src/c_opts.h    |  4 ++--
 src/c_subst.c   | 55 +++++++++++++++++++++++++------------------------------
 src/c_subst.h   |  2 +-
 src/c_unixize.c |  4 +++-
 src/c_unixize.h |  1 +
 src/u_utils.c   | 24 +++++++++++++++++++++++-
 src/u_utils.h   |  3 ++-
 8 files changed, 64 insertions(+), 36 deletions(-)

(limited to 'src')

diff --git a/src/c_opts.c b/src/c_opts.c
index f129cbf..fedc26a 100644
--- a/src/c_opts.c
+++ b/src/c_opts.c
@@ -103,6 +103,9 @@ c_recursive_parse
 		else if ((*ptr)[0] == 'i') {
 			opts->confirm = TRUE;
 		}
+		else if ((*ptr)[0] == 'k') {
+			opts->preserve = TRUE;
+		}
 		else if ((*ptr)[0] == 'n') {
 			opts->hyphen = TRUE;
 		}
@@ -139,6 +142,7 @@ c_get_opts
 	opts->confirm = FALSE;
 	opts->hidden = FALSE;
 	opts->hyphen = FALSE;
+	opts->preserve = FALSE;
 	opts->pretend = FALSE;
 	opts->recursive = FALSE;
 	opts->verbose = FALSE;
@@ -157,6 +161,9 @@ c_get_opts
 		else if (opt == 'i') {
 			opts->confirm = TRUE;
 		}
+		else if (opt == 'k') {
+			opts->preserve = TRUE;
+		}
 		else if (opt == 'n') {
 			opts->hyphen = TRUE;
 		}
diff --git a/src/c_opts.h b/src/c_opts.h
index 6587f51..f53d09c 100644
--- a/src/c_opts.h
+++ b/src/c_opts.h
@@ -48,10 +48,10 @@
 
 #include "c_unixize.h"
 
-#define C_OPTS				"ahinpRve:"
+#define C_OPTS				"ahiknpRve:"
 #define C_RECURSIVE_CHAR	'r'
 #define C_USAGE_FMT			\
-	"usage: unixize [-ahinpRv] [-e version] [directory]\n"
+	"usage: unixize [-ahiknpRv] [-e version] [directory]\n"
 #define C_C_OPT_FMT			\
 	"unixize: unsupported -e value '%s' (must be always 0, 1 or 2)\n"
 
diff --git a/src/c_subst.c b/src/c_subst.c
index 4b410ab..116336c 100644
--- a/src/c_subst.c
+++ b/src/c_subst.c
@@ -117,7 +117,8 @@ c_ext_subst
 static void
 c_classic_subst
 (char			filename[],
- const bool_t	hyphen)
+ const bool_t	hyphen,
+ const bool_t	preserve)
 {
 	char *p;
 	const char sep   = (hyphen == FALSE) ? ('_') : ('-');
@@ -125,38 +126,41 @@ c_classic_subst
 
 	p = (char*)filename;
 	while (*p != 0x00) {
-		if (*p == c_sep) {
+		if (*p == c_sep && preserve == FALSE) {
 			*p = sep;
-			c_classic_subst(filename, hyphen);
+			c_classic_subst(filename, hyphen, preserve);
 		}
 		if (*p == sep && *(p + 1) == sep) {
 			memmove(p, p + 1, (strlen(p + 1) + 1) * sizeof(char));
-			c_classic_subst(filename, hyphen);
+			c_classic_subst(filename, hyphen, preserve);
 		}
 		if (*p == '.' && *(p + 1) == '.') {
 			memmove(p, p + 1, (strlen(p + 1) + 1) * sizeof(char));
-			c_classic_subst(filename, hyphen);
+			c_classic_subst(filename, hyphen, preserve);
 		}
 		if (*p == ' ') {
 			*p = sep;
-			c_classic_subst(filename, hyphen);
+			c_classic_subst(filename, hyphen, preserve);
 		}
 		if (
 			isalnum(*p) == 0 &&
 			u_ischarset(*p, C_CHARSET_VALID) == FALSE
 		) {
 			memmove(p, p + 1, (strlen(p + 1) + 1) * sizeof(char));
-			c_classic_subst(filename, hyphen);
+			c_classic_subst(filename, hyphen, preserve);
 		}
 		p++;
 	}
 }
 
 static void
-c_num_prefix_subst(char filename[])
+c_num_prefix_subst
+(char			filename[],
+ const bool_t	hyphen)
 {
 	char *p;
 	char *p_probe;
+	const char sep = (hyphen == FALSE) ? ('_') : ('-');
 
 	p = filename;
 	while (*p != 0x00) {
@@ -170,7 +174,7 @@ c_num_prefix_subst(char filename[])
 					p_probe++;
 				}
 				if (*p_probe != 0x00) {
-					*p = '_';
+					*p = sep;
 				}
 				return;
 			}
@@ -200,27 +204,16 @@ c_specific_subst(char filename[])
 static void
 c_unicode_subst(char filename[])
 {
-	char *p;
+	unsigned char *p;
 
-	p = filename;
+	p = (unsigned char*)filename;
 	while (*p != 0x00) {
-		if (*p == -61) {
-			if (
-				u_ischarset(*p, C_CHARSET_A_MAJ) == TRUE ||
-				u_ischarset(*p, C_CHARSET_A_MIN) == TRUE
-			) {
-				printf(">>>>{%hhd}\n", *p);
-				printf(">>>>{%hhd}\n", *(p + 1));
+		if (*p == 0xc3) {
+			if (u_isucharset((unsigned char)*(p + 1), "\xa0\xb6") == TRUE) {
+				printf(">>>>{%hhx}\n", *p);
+				printf(">>>>{%hhx}\n", *(p + 1));
 				*p = 'a';
-				memmove(p + 1, p + 2, (strlen(p + 2) + 1) * sizeof(char));
-				c_unicode_subst(filename);
-			}
-			if (
-				u_ischarset(*p, C_CHARSET_O_MAJ) == TRUE ||
-				u_ischarset(*p, C_CHARSET_O_MIN) == TRUE
-			) {
-				*p = 'o';
-				memmove(p + 1, p + 2, (strlen(p + 2) + 1) * sizeof(char));
+				memmove(p + 1, p + 2, (strlen((const char*)p + 2) + 1) * sizeof(char));
 				c_unicode_subst(filename);
 			}
 		}
@@ -233,6 +226,7 @@ c_subst_current
 (char					new_fname[],
  const char				og_fname[],
  const bool_t			hyphen,
+ const bool_t			preserve,
  const unsigned char	cxx)
 {
 	unsigned char* p;
@@ -244,16 +238,17 @@ c_subst_current
 		p++;
 	}
 	c_ext_subst(new_fname, cxx);
-	c_num_prefix_subst(new_fname);
+	c_num_prefix_subst(new_fname, hyphen);
 	c_specific_subst(new_fname);
 	c_unicode_subst(new_fname);
-	c_classic_subst(new_fname, hyphen);
+	c_classic_subst(new_fname, hyphen, preserve);
 }
 
 struct lfiles_s*
 c_subst_filenames
 (struct lfiles_s*		og_head,
  const bool_t			hyphen,
+ const bool_t			preserve,
  const unsigned char	cxx)
 {
 	struct lfiles_s* dup_head;
@@ -265,7 +260,7 @@ c_subst_filenames
 	link = NULL;
 	origin = og_head;
 	while (origin != NULL) {
-		c_subst_current(tmp, origin->filename, hyphen, cxx);
+		c_subst_current(tmp, origin->filename, hyphen, preserve, cxx);
 		link = c_lfiles_new(tmp, origin->filetype);
 		if (link == NULL) {
 			u_dump_errno();
diff --git a/src/c_subst.h b/src/c_subst.h
index e21f72c..fb4f16a 100644
--- a/src/c_subst.h
+++ b/src/c_subst.h
@@ -56,7 +56,7 @@
 #define C_CHARSET_O_MAJ "\u00d2\u00d3\u00d4\u00d5\u00d6\u00d8"
 #define C_CHARSET_O_MIN "\u00f2\u00f3\u00f4\u00f5\u00f6\u00f8"
 
-struct lfiles_s* c_subst_filenames(struct lfiles_s*,
+struct lfiles_s* c_subst_filenames(struct lfiles_s*, const bool_t,
 	const bool_t, const unsigned char);
 
 #endif /* ifndef __C_SUBST_H__ */
diff --git a/src/c_unixize.c b/src/c_unixize.c
index 324d358..eac2eb3 100644
--- a/src/c_unixize.c
+++ b/src/c_unixize.c
@@ -49,6 +49,7 @@
 
 #include <dirent.h>
 #include <errno.h>
+#include <locale.h>
 #include <stdio.h>
 #include <stddef.h>
 #include <string.h>
@@ -74,6 +75,7 @@ main
 	char** nargv;
 	static char subpath[MAXPATHLEN] = "";
 
+	setlocale(LC_ALL, "");
 	if (c_get_opts(&opts, argc, argv) == FALSE) {
 		return (0);
 	}
@@ -94,7 +96,7 @@ main
 	if (og_files == NULL) {
 		return (0);
 	}
-	new_files = c_subst_filenames(og_files, opts.hyphen, opts.cxx);
+	new_files = c_subst_filenames(og_files, opts.hyphen, opts.preserve, opts.cxx);
 	if (new_files == NULL) {
 		c_lfiles_clear(&og_files);
 		return (1);
diff --git a/src/c_unixize.h b/src/c_unixize.h
index 6765d4b..4917a66 100644
--- a/src/c_unixize.h
+++ b/src/c_unixize.h
@@ -59,6 +59,7 @@ struct opts_s {
 	bool_t	confirm;
 	bool_t	hidden;
 	bool_t	hyphen;
+	bool_t	preserve;
 	bool_t	pretend;
 	bool_t	recursive;
 	bool_t	verbose;
diff --git a/src/u_utils.c b/src/u_utils.c
index c5dc869..88f768c 100644
--- a/src/u_utils.c
+++ b/src/u_utils.c
@@ -114,6 +114,11 @@ u_get_extra_args(char args[], struct opts_s* opts)
 		null_char += 2;
 		i++;
 	}
+	if (opts->preserve == TRUE) {
+		memcpy((char*)args + (i * 2), "k:", 3 * sizeof(char));
+		null_char += 2;
+		i++;
+	}
 	if (opts->hyphen == TRUE) {
 		memcpy((char*)args + (i * 2), "n:", 3 * sizeof(char));
 		null_char += 2;
@@ -237,7 +242,7 @@ u_decrease_subpath(char subp[])
 
 bool_t
 u_ischarset
-(int		c,
+(const int	c,
  const char	cs[])
 {
 	char *p;
@@ -251,3 +256,20 @@ u_ischarset
 	}
 	return (FALSE);
 }
+
+bool_t
+u_isucharset
+(const unsigned char	c,
+ const unsigned char	cs[])
+{
+	char *p;
+
+	p = (char*)cs;
+	while (*p != 0x00 && *p != c) {
+		p++;
+	}
+	if (*p != 0x00) {
+		return (TRUE);
+	}
+	return (FALSE);
+}
diff --git a/src/u_utils.h b/src/u_utils.h
index 1add802..9c04055 100644
--- a/src/u_utils.h
+++ b/src/u_utils.h
@@ -54,4 +54,5 @@ void	u_del_nargv(char** nargv);
 char**	u_get_nargv(struct opts_s*);
 void	u_increase_subpath(char[], const char[]);
 void	u_decrease_subpath(char[]);
-bool_t	u_ischarset(int, const char[]);
+bool_t	u_ischarset(const int, const char[]);
+bool_t	u_isucharset(const unsigned char, const unsigned char[]);
-- 
cgit v1.2.3