/* Extracting a message. Accumulating the message list. Copyright (C) 2001-2018 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifdef HAVE_CONFIG_H # include #endif /* Specification. */ #include "xg-message.h" #include #include "c-strstr.h" #include "error-progname.h" #include "format.h" #include "read-catalog-abstract.h" #include "xalloc.h" #include "xerror.h" #include "xvasprintf.h" #include "xgettext.h" #include "gettext.h" #define _(str) gettext (str) #define CONVERT_STRING(string, lcontext) \ string = from_current_source_encoding (string, lcontext, pos->file_name, \ pos->line_number); /* Update the is_format[] flags depending on the information given in the context. */ static void set_format_flags_from_context (enum is_format is_format[NFORMATS], flag_context_ty context, const char *string, lex_pos_ty *pos, const char *pretty_msgstr) { size_t i; if (context.is_format1 != undecided || context.is_format2 != undecided || context.is_format3 != undecided) for (i = 0; i < NFORMATS; i++) { if (is_format[i] == undecided) { if (formatstring_parsers[i] == current_formatstring_parser1 && context.is_format1 != undecided) is_format[i] = (enum is_format) context.is_format1; if (formatstring_parsers[i] == current_formatstring_parser2 && context.is_format2 != undecided) is_format[i] = (enum is_format) context.is_format2; if (formatstring_parsers[i] == current_formatstring_parser3 && context.is_format3 != undecided) is_format[i] = (enum is_format) context.is_format3; } if (possible_format_p (is_format[i])) { struct formatstring_parser *parser = formatstring_parsers[i]; char *invalid_reason = NULL; void *descr = parser->parse (string, false, NULL, &invalid_reason); if (descr != NULL) parser->free (descr); else { /* The string is not a valid format string. */ if (is_format[i] != possible) { char buffer[21]; error_with_progname = false; if (pos->line_number == (size_t)(-1)) buffer[0] = '\0'; else sprintf (buffer, ":%ld", (long) pos->line_number); multiline_warning (xasprintf (_("%s%s: warning: "), pos->file_name, buffer), xasprintf (is_format[i] == yes_according_to_context ? _("Although being used in a format string position, the %s is not a valid %s format string. Reason: %s\n") : _("Although declared as such, the %s is not a valid %s format string. Reason: %s\n"), pretty_msgstr, format_language_pretty[i], invalid_reason)); error_with_progname = true; } is_format[i] = impossible; free (invalid_reason); } } } } static void warn_format_string (enum is_format is_format[NFORMATS], const char *string, lex_pos_ty *pos, const char *pretty_msgstr) { if (possible_format_p (is_format[format_python]) && get_python_format_unnamed_arg_count (string) > 1) { char buffer[21]; error_with_progname = false; if (pos->line_number == (size_t)(-1)) buffer[0] = '\0'; else sprintf (buffer, ":%ld", (long) pos->line_number); multiline_warning (xasprintf (_("%s%s: warning: "), pos->file_name, buffer), xasprintf (_("\ '%s' format string with unnamed arguments cannot be properly localized:\n\ The translator cannot reorder the arguments.\n\ Please consider using a format string with named arguments,\n\ and a mapping instead of a tuple for the arguments.\n"), pretty_msgstr)); error_with_progname = true; } } message_ty * remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid, bool is_utf8, flag_context_ty context, lex_pos_ty *pos, const char *extracted_comment, refcounted_string_list_ty *comment, bool comment_is_utf8) { enum is_format is_format[NFORMATS]; struct argument_range range; enum is_wrap do_wrap; enum is_syntax_check do_syntax_check[NSYNTAXCHECKS]; message_ty *mp; char *msgstr; size_t i; /* See whether we shall exclude this message. */ if (exclude != NULL && message_list_search (exclude, msgctxt, msgid) != NULL) { /* Tell the lexer to reset its comment buffer, so that the next message gets the correct comments. */ xgettext_comment_reset (); savable_comment_reset (); if (msgctxt != NULL) free (msgctxt); free (msgid); return NULL; } savable_comment_to_xgettext_comment (comment); for (i = 0; i < NFORMATS; i++) is_format[i] = undecided; range.min = -1; range.max = -1; do_wrap = undecided; for (i = 0; i < NSYNTAXCHECKS; i++) do_syntax_check[i] = undecided; if (!is_utf8) { if (msgctxt != NULL) CONVERT_STRING (msgctxt, lc_string); CONVERT_STRING (msgid, lc_string); } if (msgctxt == NULL && msgid[0] == '\0' && !xgettext_omit_header) { char buffer[21]; error_with_progname = false; if (pos->line_number == (size_t)(-1)) buffer[0] = '\0'; else sprintf (buffer, ":%ld", (long) pos->line_number); multiline_warning (xasprintf (_("%s%s: warning: "), pos->file_name, buffer), xstrdup (_("\ Empty msgid. It is reserved by GNU gettext:\n\ gettext(\"\") returns the header entry with\n\ meta information, not the empty string.\n"))); error_with_progname = true; } /* See if we have seen this message before. */ mp = message_list_search (mlp, msgctxt, msgid); if (mp != NULL) { if (msgctxt != NULL) free (msgctxt); free (msgid); for (i = 0; i < NFORMATS; i++) is_format[i] = mp->is_format[i]; do_wrap = mp->do_wrap; for (i = 0; i < NSYNTAXCHECKS; i++) do_syntax_check[i] = mp->do_syntax_check[i]; } else { /* Construct the msgstr from the prefix and suffix, otherwise use the empty string. */ if (msgstr_prefix) msgstr = xasprintf ("%s%s%s", msgstr_prefix, msgid, msgstr_suffix); else msgstr = ""; /* Allocate a new message and append the message to the list. */ mp = message_alloc (msgctxt, msgid, NULL, msgstr, strlen (msgstr) + 1, pos); /* Do not free msgctxt and msgid. */ message_list_append (mlp, mp); } /* Determine whether the context specifies that the msgid is a format string. */ set_format_flags_from_context (is_format, context, mp->msgid, pos, "msgid"); /* Ask the lexer for the comments it has seen. */ { size_t nitems_before; size_t nitems_after; int j; bool add_all_remaining_comments; /* The string before the comment tag. For example, If "** TRANSLATORS:" is seen and the comment tag is "TRANSLATORS:", then comment_tag_prefix is set to "** ". */ const char *comment_tag_prefix = ""; size_t comment_tag_prefix_length = 0; nitems_before = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0); if (extracted_comment != NULL) { char *copy = xstrdup (extracted_comment); char *rest; rest = copy; while (*rest != '\0') { char *newline = strchr (rest, '\n'); if (newline != NULL) { *newline = '\0'; message_comment_dot_append (mp, rest); rest = newline + 1; } else { message_comment_dot_append (mp, rest); break; } } free (copy); } add_all_remaining_comments = add_all_comments; for (j = 0; ; ++j) { const char *s = xgettext_comment (j); const char *t; if (s == NULL) break; if (!comment_is_utf8) CONVERT_STRING (s, lc_comment); /* To reduce the possibility of unwanted matches we do a two step match: the line must contain 'xgettext:' and one of the possible format description strings. */ if ((t = c_strstr (s, "xgettext:")) != NULL) { bool tmp_fuzzy; enum is_format tmp_format[NFORMATS]; struct argument_range tmp_range; enum is_wrap tmp_wrap; enum is_syntax_check tmp_syntax_check[NSYNTAXCHECKS]; bool interesting; t += strlen ("xgettext:"); po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_range, &tmp_wrap, tmp_syntax_check); interesting = false; for (i = 0; i < NFORMATS; i++) if (tmp_format[i] != undecided) { is_format[i] = tmp_format[i]; interesting = true; } if (has_range_p (tmp_range)) { range = tmp_range; interesting = true; } if (tmp_wrap != undecided) { do_wrap = tmp_wrap; interesting = true; } for (i = 0; i < NSYNTAXCHECKS; i++) if (tmp_syntax_check[i] != undecided) { do_syntax_check[i] = tmp_syntax_check[i]; interesting = true; } /* If the "xgettext:" marker was followed by an interesting keyword, and we updated our is_format/do_wrap variables, we don't print the comment as a #. comment. */ if (interesting) continue; } if (!add_all_remaining_comments && comment_tag != NULL) { /* When the comment tag is seen, it drags in not only the line which it starts, but all remaining comment lines. */ if ((t = c_strstr (s, comment_tag)) != NULL) { add_all_remaining_comments = true; comment_tag_prefix = s; comment_tag_prefix_length = t - s; } } if (add_all_remaining_comments) { if (strncmp (s, comment_tag_prefix, comment_tag_prefix_length) == 0) s += comment_tag_prefix_length; message_comment_dot_append (mp, s); } } nitems_after = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0); /* Don't add the comments if they are a repetition of the tail of the already present comments. This avoids unneeded duplication if the same message appears several times, each time with the same comment. */ if (nitems_before < nitems_after) { size_t added = nitems_after - nitems_before; if (added <= nitems_before) { bool repeated = true; for (i = 0; i < added; i++) if (strcmp (mp->comment_dot->item[nitems_before - added + i], mp->comment_dot->item[nitems_before + i]) != 0) { repeated = false; break; } if (repeated) { for (i = 0; i < added; i++) free ((char *) mp->comment_dot->item[nitems_before + i]); mp->comment_dot->nitems = nitems_before; } } } } /* If it is not already decided, through programmer comments, whether the msgid is a format string, examine the msgid. This is a heuristic. */ for (i = 0; i < NFORMATS; i++) { if (is_format[i] == undecided && (formatstring_parsers[i] == current_formatstring_parser1 || formatstring_parsers[i] == current_formatstring_parser2 || formatstring_parsers[i] == current_formatstring_parser3) /* But avoid redundancy: objc-format is stronger than c-format. */ && !(i == format_c && possible_format_p (is_format[format_objc])) && !(i == format_objc && possible_format_p (is_format[format_c])) /* Avoid flagging a string as c-format when it's known to be a qt-format or qt-plural-format or kde-format or boost-format string. */ && !(i == format_c && (possible_format_p (is_format[format_qt]) || possible_format_p (is_format[format_qt_plural]) || possible_format_p (is_format[format_kde]) || possible_format_p (is_format[format_kde_kuit]) || possible_format_p (is_format[format_boost]))) /* Avoid flagging a string as kde-format when it's known to be a kde-kuit-format string. */ && !(i == format_kde && possible_format_p (is_format[format_kde_kuit])) /* Avoid flagging a string as kde-kuit-format when it's known to be a kde-format string. Note that this relies on the fact that format_kde < format_kde_kuit, so a string will be marked as kde-format if both are undecided. */ && !(i == format_kde_kuit && possible_format_p (is_format[format_kde]))) { struct formatstring_parser *parser = formatstring_parsers[i]; char *invalid_reason = NULL; void *descr = parser->parse (mp->msgid, false, NULL, &invalid_reason); if (descr != NULL) { /* msgid is a valid format string. We mark only those msgids as format strings which contain at least one format directive and thus are format strings with a high probability. We don't mark strings without directives as format strings, because that would force the programmer to add "xgettext: no-c-format" anywhere where a translator wishes to use a percent sign. So, the msgfmt checking will not be perfect. Oh well. */ if (parser->get_number_of_directives (descr) > 0 && !(parser->is_unlikely_intentional != NULL && parser->is_unlikely_intentional (descr))) is_format[i] = possible; parser->free (descr); } else { /* msgid is not a valid format string. */ is_format[i] = impossible; free (invalid_reason); } } mp->is_format[i] = is_format[i]; } if (has_range_p (range)) { if (has_range_p (mp->range)) { if (range.min < mp->range.min) mp->range.min = range.min; if (range.max > mp->range.max) mp->range.max = range.max; } else mp->range = range; } mp->do_wrap = do_wrap == no ? no : yes; /* By default we wrap. */ for (i = 0; i < NSYNTAXCHECKS; i++) { if (do_syntax_check[i] == undecided) do_syntax_check[i] = default_syntax_check[i] == yes ? yes : no; mp->do_syntax_check[i] = do_syntax_check[i]; } /* Warn about the use of non-reorderable format strings when the programming language also provides reorderable format strings. */ warn_format_string (is_format, mp->msgid, pos, "msgid"); /* Remember where we saw this msgid. */ message_comment_filepos (mp, pos->file_name, pos->line_number); /* Tell the lexer to reset its comment buffer, so that the next message gets the correct comments. */ xgettext_comment_reset (); savable_comment_reset (); return mp; } void remember_a_message_plural (message_ty *mp, char *string, bool is_utf8, flag_context_ty context, lex_pos_ty *pos, refcounted_string_list_ty *comment, bool comment_is_utf8) { char *msgid_plural; char *msgstr1; size_t msgstr1_len; char *msgstr; size_t i; msgid_plural = string; savable_comment_to_xgettext_comment (comment); if (!is_utf8) CONVERT_STRING (msgid_plural, lc_string); /* See if the message is already a plural message. */ if (mp->msgid_plural == NULL) { mp->msgid_plural = msgid_plural; /* Construct the first plural form from the prefix and suffix, otherwise use the empty string. The translator will have to provide additional plural forms. */ if (msgstr_prefix) msgstr1 = xasprintf ("%s%s%s", msgstr_prefix, msgid_plural, msgstr_suffix); else msgstr1 = ""; msgstr1_len = strlen (msgstr1) + 1; msgstr = XNMALLOC (mp->msgstr_len + msgstr1_len, char); memcpy (msgstr, mp->msgstr, mp->msgstr_len); memcpy (msgstr + mp->msgstr_len, msgstr1, msgstr1_len); mp->msgstr = msgstr; mp->msgstr_len = mp->msgstr_len + msgstr1_len; if (msgstr_prefix) free (msgstr1); /* Determine whether the context specifies that the msgid_plural is a format string. */ set_format_flags_from_context (mp->is_format, context, mp->msgid_plural, pos, "msgid_plural"); /* If it is not already decided, through programmer comments or the msgid, whether the msgid is a format string, examine the msgid_plural. This is a heuristic. */ for (i = 0; i < NFORMATS; i++) if ((formatstring_parsers[i] == current_formatstring_parser1 || formatstring_parsers[i] == current_formatstring_parser2 || formatstring_parsers[i] == current_formatstring_parser3) && (mp->is_format[i] == undecided || mp->is_format[i] == possible) /* But avoid redundancy: objc-format is stronger than c-format. */ && !(i == format_c && possible_format_p (mp->is_format[format_objc])) && !(i == format_objc && possible_format_p (mp->is_format[format_c])) /* Avoid flagging a string as c-format when it's known to be a qt-format or qt-plural-format or boost-format string. */ && !(i == format_c && (possible_format_p (mp->is_format[format_qt]) || possible_format_p (mp->is_format[format_qt_plural]) || possible_format_p (mp->is_format[format_kde]) || possible_format_p (mp->is_format[format_kde_kuit]) || possible_format_p (mp->is_format[format_boost]))) /* Avoid flagging a string as kde-format when it's known to be a kde-kuit-format string. */ && !(i == format_kde && possible_format_p (mp->is_format[format_kde_kuit])) /* Avoid flagging a string as kde-kuit-format when it's known to be a kde-format string. Note that this relies on the fact that format_kde < format_kde_kuit, so a string will be marked as kde-format if both are undecided. */ && !(i == format_kde_kuit && possible_format_p (mp->is_format[format_kde]))) { struct formatstring_parser *parser = formatstring_parsers[i]; char *invalid_reason = NULL; void *descr = parser->parse (mp->msgid_plural, false, NULL, &invalid_reason); if (descr != NULL) { /* Same heuristic as in remember_a_message. */ if (parser->get_number_of_directives (descr) > 0 && !(parser->is_unlikely_intentional != NULL && parser->is_unlikely_intentional (descr))) mp->is_format[i] = possible; parser->free (descr); } else { /* msgid_plural is not a valid format string. */ mp->is_format[i] = impossible; free (invalid_reason); } } /* Warn about the use of non-reorderable format strings when the programming language also provides reorderable format strings. */ warn_format_string (mp->is_format, mp->msgid_plural, pos, "msgid_plural"); } else free (msgid_plural); /* Tell the lexer to reset its comment buffer, so that the next message gets the correct comments. */ xgettext_comment_reset (); savable_comment_reset (); }