mirror of
				https://github.com/subsurface/subsurface.git
				synced 2025-02-19 22:16:15 +00:00 
			
		
		
		
	Metadata: rudimentary support for XMP metadataa in MP4-based videos
XMP is a media-metadata standard based on XML which may be used across a variety of media formats. Some video-processing software writes XMP data without updating the native metadata fields. Therefore, we should aim at reading XMP metadata and give priority of XMP data over native fields. Pros: - Support for *all* common media formats. Cons: - XML (complex, verbose, chaotic). - Does not even come close to fulfilling its promise of being well defined (see below). Implement a simple XMP-parser using libxml2. Connect the XMP-parser to the existing Quicktime/MP4 parser. First problem encountered: According to the spec, XMP data supposed to be put in the 'XMP_' atom. But for example exiftools instead writes an 'uuid' atom with a special 16-byte uid. Implement both, more options will probably follow. Second problem: two versions of recording the creation date were found 1) The content of a <exif:DateTimeOriginal> tag. 2) The xmp::CreateDate attribute of a <rdf:Description> tag. Here too, more versions are expected to surface and will have to be supported in due course (with an obvious priority problem). Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>
This commit is contained in:
		
							parent
							
								
									0aab39b35d
								
							
						
					
					
						commit
						cc4f48be3f
					
				
					 4 changed files with 185 additions and 4 deletions
				
			
		
							
								
								
									
										138
									
								
								core/xmp_parser.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										138
									
								
								core/xmp_parser.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,138 @@ | |||
| #include "xmp_parser.h" | ||||
| #include "subsurface-string.h" | ||||
| 
 | ||||
| #include <libxml/parser.h> | ||||
| #include <libxml/tree.h> | ||||
| #include <cctype> | ||||
| 
 | ||||
| extern "C" timestamp_t utc_mktime(struct tm *tm); // declared in core/dive.h
 | ||||
| 
 | ||||
| static timestamp_t parse_xmp_date(const char *date) | ||||
| { | ||||
| 	// Format: "yyyy-mm-dd[Thh:mm[:ss[.ms]][-05:00]]"
 | ||||
| 	int year, month, day; | ||||
| 	if (sscanf(date, "%d-%d-%d", &year, &month, &day) != 3) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	int hours = 0, minutes = 0, seconds = 0, milliseconds = 0; | ||||
| 	int timezone = 0; | ||||
| 
 | ||||
| 	// Check for time part
 | ||||
| 	if ((date = strchr(date, 'T')) != nullptr) { | ||||
| 		++date; // Skip 'T'
 | ||||
| 		if (sscanf(date, "%d:%d:%d.%d", &hours, &minutes, &seconds, &milliseconds) < 2) | ||||
| 			return 0; | ||||
| 
 | ||||
| 		// Check for timezone part. Note that we simply ignore 'Z' as that
 | ||||
| 		// means no time zone
 | ||||
| 		while (*date && *date != '+' && *date != '-') | ||||
| 			++date; | ||||
| 		if (*date) { | ||||
| 			int sign = *date == '+' ? 1 : -1; | ||||
| 			int timezone_hours, timezone_minutes; | ||||
| 			++date; | ||||
| 			if (sscanf(date, "%d:%d", &timezone_hours, &timezone_minutes) != 2) | ||||
| 				return 0; | ||||
| 			timezone = sign * (timezone_hours * 60 + timezone_minutes) * 60; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	// Round to seconds, since our timestamps are in seconds
 | ||||
| 	if (milliseconds >= 500) | ||||
| 		seconds += 1; | ||||
| 
 | ||||
| 	struct tm tm = { 0 }; | ||||
| 	tm.tm_year = year - 1900; | ||||
| 	tm.tm_mon = month - 1; | ||||
| 	tm.tm_mday = day; | ||||
| 	tm.tm_hour = hours; | ||||
| 	tm.tm_min = minutes; | ||||
| 	tm.tm_sec = seconds; | ||||
| 
 | ||||
| 	timestamp_t res = utc_mktime(&tm); | ||||
| 	res += timezone; | ||||
| 
 | ||||
| 	return res; | ||||
| } | ||||
| 
 | ||||
| static timestamp_t extract_timestamp_from_attributes(const xmlNode *node) | ||||
| { | ||||
| 	for (const xmlAttr *p = node->properties; p; p = p->next) { | ||||
| 		const xmlChar *ns = p->ns ? p->ns->prefix : nullptr; | ||||
| 
 | ||||
| 		// Check for xmp::CreateDate property
 | ||||
| 		if (!strcmp((const char *)ns, "xmp") && !strcmp((const char *)p->name, "CreateDate")) { | ||||
| 			// We only support a single property value
 | ||||
| 			if (!p->children || !p->children->content) | ||||
| 				return 0; | ||||
| 			const char *date = (const char *)p->children->content; | ||||
| 			return parse_xmp_date(date); | ||||
| 		} | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static timestamp_t extract_timestamp(const xmlNode *node) | ||||
| { | ||||
| 	// We use a private stack, so that we can return in one go without
 | ||||
| 	// having to unwind the call-stack. We only recurse to a fixed depth,
 | ||||
| 	// since the data we are interested in are at a shallow depth.
 | ||||
| 	// This can be increased on demand.
 | ||||
| 	static const int max_recursion_depth = 16; | ||||
| 	const xmlNode *stack[max_recursion_depth]; | ||||
| 	stack[0] = node; | ||||
| 	int stack_depth = 1; | ||||
| 
 | ||||
| 	while (stack_depth > 0) { | ||||
| 		const xmlNode *node = stack[stack_depth - 1]; | ||||
| 		// Parse attributes
 | ||||
| 		timestamp_t timestamp = extract_timestamp_from_attributes(node); | ||||
| 		if (timestamp) | ||||
| 			return timestamp; | ||||
| 
 | ||||
| 		// Parse content, if not blank node. Content can only be at the second level,
 | ||||
| 		// since it is always contained in a tag.
 | ||||
| 		// TODO: We have to cast node to pointer to non-const, since we're supporting
 | ||||
| 		// old libxml2 versions, where xmlIsBlankNode takes such a pointer. Remove
 | ||||
| 		// in due course.
 | ||||
| 		if (!xmlIsBlankNode((xmlNode *)node) && stack_depth >= 2) { | ||||
| 			const xmlNode *parent = stack[stack_depth - 2]; | ||||
| 			// If this is a text node and the parent node is exif:DateTimeOriginal, try to parse as date
 | ||||
| 			if (!node->ns && parent->ns && | ||||
| 			    same_string((const char *)parent->ns->prefix, "exif") && | ||||
| 			    same_string((const char *)parent->name, "DateTimeOriginal")) { | ||||
| 				const char *date = (const char *)node->content; | ||||
| 				timestamp_t res = parse_xmp_date(date); | ||||
| 				if(res) | ||||
| 					return res; | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		// If there are sub-items and we haven't reached recursion depth, recurse
 | ||||
| 		if (node->children && stack_depth < max_recursion_depth) { | ||||
| 			stack[stack_depth++] = node->children; | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		// Advance stack to next node in this level
 | ||||
| 		while (stack_depth > 0) { | ||||
| 			if ((stack[stack_depth - 1] = stack[stack_depth - 1]->next) != nullptr) | ||||
| 				break; | ||||
| 			// No more nodes at this level -> go up a level.
 | ||||
| 			--stack_depth; | ||||
| 		} | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| timestamp_t parse_xmp(const char *data, size_t size) | ||||
| { | ||||
| 	const char *encoding = xmlGetCharEncodingName(XML_CHAR_ENCODING_UTF8); | ||||
| 	// TODO: What do we pass as URL-parameter?
 | ||||
| 	xmlDoc *doc = xmlReadMemory(data, size, "url", encoding, 0); | ||||
| 	if (!doc) | ||||
| 		return 0; | ||||
| 	timestamp_t res = extract_timestamp(xmlDocGetRootElement(doc)); | ||||
| 	xmlFreeDoc(doc); | ||||
| 	return res; | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue